Beispiel #1
0
    def __init__(self, length=None, collation=None, **kwargs):
        """
        Construct a VARCHAR.

        :param collation: Optional, a column-level collation for this string
          value.
        """
        String.__init__(self, length, kwargs.get('convert_unicode', False),
            kwargs.get('assert_unicode', None))
        _CollationMixin.__init__(self, collation, **kwargs)
Beispiel #2
0
 class Person( self.Entity ):
     has_field('name', String(30))
     has_many('pets', of_kind='Animal')
Beispiel #3
0
 class B( self.Entity ):
     name = Field(String(60))
     a = ManyToOne('A', target_column='name')
Beispiel #4
0
 class User( self.Entity ):
     name = Field(String(50))
     boston_addresses = OneToMany('Address', filter=lambda c:
                                  c.city == u'Boston')
     addresses = OneToMany('Address')
Beispiel #5
0
class DbComputer(Base):
    __tablename__ = "db_dbcomputer"

    id = Column(Integer, primary_key=True)

    uuid = Column(UUID(as_uuid=True), default=uuid_func)
    name = Column(String(255), unique=True, nullable=False)
    hostname = Column(String(255))

    description = Column(Text, nullable=True)
    enabled = Column(Boolean)

    transport_type = Column(String(255))
    scheduler_type = Column(String(255))

    transport_params = Column(JSONB)
    _metadata = Column('metadata', JSONB)

    def __init__(self, *args, **kwargs):
        self.enabled = True
        self._metadata = {}
        self.transport_params = {}
        # TODO SP: it's supposed to be nullable, but there is a NOT NULL
        # constraint inside the DB.
        self.description= ""

        super(DbComputer, self).__init__(*args, **kwargs)

    @classmethod
    def get_dbcomputer(cls, computer):
        """
        Return a DbComputer from its name (or from another Computer or DbComputer instance)
        """

        from aiida.orm.computer import Computer
        if isinstance(computer, basestring):
            try:
                dbcomputer = cls.session.query(cls).filter(cls.name==computer).one()
            except NoResultFound:
                raise NotExistent("No computer found in the table of computers with "
                                  "the given name '{}'".format(computer))
            except MultipleResultsFound:
                raise DbContentError("There is more than one computer with name '{}', "
                                     "pass a Computer instance".format(computer))
        elif isinstance(computer, int):
            try:
                dbcomputer = cls.session.query(cls).filter(cls.id==computer).one()
            except NoResultFound:
                raise NotExistent("No computer found in the table of computers with "
                                  "the given id '{}'".format(computer))
        elif isinstance(computer, DbComputer):
            if computer.id is None:
                raise ValueError("The computer instance you are passing has not been stored yet")
            dbcomputer = computer
        elif isinstance(computer, Computer):
            if computer.dbcomputer.id is None:
                raise ValueError("The computer instance you are passing has not been stored yet")
            dbcomputer = computer.dbcomputer
        else:
            raise TypeError("Pass either a computer name, a DbComputer SQLAlchemy instance, a Computer id or a Computer object")
        return dbcomputer

    def get_aiida_class(self):
        from aiida.orm.computer import Computer
        return Computer(dbcomputer=self)

    def get_workdir(self):
        try:
            return self._metadata['workdir']
        except KeyError:
            raise ConfigurationError('No workdir found for DbComputer {} '.format(
                self.name))

    @property
    def pk(self):
        return self.id

    def __str__(self):
        if self.enabled:
            return "{} ({})".format(self.name, self.hostname)
        else:
            return "{} ({}) [DISABLED]".format(self.name, self.hostname)
Beispiel #6
0
import sqlalchemy
from sqlalchemy import create_engine
from datetime import date
today = date.today().strftime('%d_%m_%Y')

engine = create_engine(
    "mysql+pymysql://root:root@localhost/corona")  #bd corona
engine.connect()  # connect to the database

df_europe = pd.read_csv("donnees_world_covidvirus_{}.csv".format(today))
df_europe['Date'] = pd.to_datetime(df_europe['Date'])

df_europe.to_sql('full_data',
                 con=engine,
                 if_exists='replace',
                 chunksize=1000,
                 index=False,
                 dtype={
                     "Date": Date,
                     "Country": String(22),
                     "Lat": Float,
                     "Long": Float,
                     "Confirmed": Integer,
                     "Deaths": Integer,
                     "Recovered": Integer,
                     "Code": String(4),
                     "Continent": String(7),
                     "Region": String(20),
                     "Population": Integer,
                     "LifeExpectancy": Float
                 })
Beispiel #7
0
class DbNode(Base):
    __tablename__ = "db_dbnode"

    aiida_query = _QueryProperty(_AiidaQuery)

    id = Column(Integer, primary_key=True)
    uuid = Column(UUID(as_uuid=True), default=uuid_func)
    type = Column(String(255), index=True)
    label = Column(
        String(255), index=True, nullable=True,
        default="")  # Does it make sense to be nullable and have a default?
    description = Column(Text(), nullable=True, default="")
    ctime = Column(DateTime(timezone=True), default=timezone.now)
    mtime = Column(DateTime(timezone=True), default=timezone.now)
    nodeversion = Column(Integer, default=1)
    public = Column(Boolean, default=False)
    attributes = Column(JSONB)
    extras = Column(JSONB)

    dbcomputer_id = Column(Integer,
                           ForeignKey('db_dbcomputer.id',
                                      deferrable=True,
                                      initially="DEFERRED",
                                      ondelete="RESTRICT"),
                           nullable=True)

    # This should have the same ondelet behaviour as db_computer_id, right?
    user_id = Column(Integer,
                     ForeignKey('db_dbuser.id',
                                deferrable=True,
                                initially="DEFERRED",
                                ondelete="restrict"),
                     nullable=False)

    # TODO SP: The 'passive_deletes=all' argument here means that SQLAlchemy
    # won't take care of automatic deleting in the DbLink table. This still
    # isn't exactly the same behaviour than with Django. The solution to
    # this is probably a ON DELETE inside the DB. On removing node with id=x,
    # we would remove all link with x as an output.

    ######### RELATIONSSHIPS ################

    dbcomputer = relationship('DbComputer',
                              backref=backref('dbnodes',
                                              passive_deletes='all',
                                              cascade='merge'))

    # User
    user = relationship('DbUser',
                        backref=backref(
                            'dbnodes',
                            passive_deletes='all',
                            cascade='merge',
                        ))

    # outputs via db_dblink table
    outputs_q = relationship("DbNode",
                             secondary="db_dblink",
                             primaryjoin="DbNode.id == DbLink.input_id",
                             secondaryjoin="DbNode.id == DbLink.output_id",
                             backref=backref("inputs_q",
                                             passive_deletes=True,
                                             lazy='dynamic'),
                             lazy='dynamic',
                             passive_deletes=True)

    def __init__(self, *args, **kwargs):
        super(DbNode, self).__init__(*args, **kwargs)

        if self.attributes is None:
            self.attributes = dict()

        if self.extras is None:
            self.extras = dict()

    @property
    def outputs(self):
        return self.outputs_q.all()

    @property
    def inputs(self):
        return self.inputs_q.all()

    # XXX repetition between django/sqlalchemy here.
    def get_aiida_class(self):
        """
        Return the corresponding aiida instance of class aiida.orm.Node or a
        appropriate subclass.
        """
        from aiida.common.old_pluginloader import from_type_to_pluginclassname
        from aiida.orm.node import Node
        from aiida.common.pluginloader import load_plugin_safe

        try:
            pluginclassname = from_type_to_pluginclassname(self.type)
        except DbContentError:
            raise DbContentError("The type name of node with pk= {} is "
                                 "not valid: '{}'".format(self.pk, self.type))

        PluginClass = load_plugin_safe(Node, 'aiida.orm', pluginclassname,
                                       self.type, self.pk)

        return PluginClass(dbnode=self)

    def get_simple_name(self, invalid_result=None):
        """
        Return a string with the last part of the type name.

        If the type is empty, use 'Node'.
        If the type is invalid, return the content of the input variable
        ``invalid_result``.

        :param invalid_result: The value to be returned if the node type is
            not recognized.
        """
        thistype = self.type
        # Fix for base class
        if thistype == "":
            thistype = "node.Node."
        if not thistype.endswith("."):
            return invalid_result
        else:
            thistype = thistype[:-1]  # Strip final dot
            return thistype.rpartition('.')[2]

    def set_attr(self, key, value):
        DbNode._set_attr(self.attributes, key, value)
        flag_modified(self, "attributes")
        self.save()

    def set_extra(self, key, value):
        DbNode._set_attr(self.extras, key, value)
        flag_modified(self, "extras")
        self.save()

    def reset_extras(self, new_extras):
        self.extras.clear()
        self.extras.update(new_extras)
        flag_modified(self, "extras")
        self.save()

    def del_attr(self, key):
        DbNode._del_attr(self.attributes, key)
        flag_modified(self, "attributes")
        self.save()

    def del_extra(self, key):
        DbNode._del_attr(self.extras, key)
        flag_modified(self, "extras")
        self.save()

    @staticmethod
    def _set_attr(d, key, value):
        if '.' in key:
            raise ValueError(
                "We don't know how to treat key with dot in it yet")

        d[key] = value

    @staticmethod
    def _del_attr(d, key):
        if '.' in key:
            raise ValueError(
                "We don't know how to treat key with dot in it yet")

        if key not in d:
            raise ValueError("Key {} does not exists".format(key))

        del d[key]

    @property
    def pk(self):
        return self.id

    def __str__(self):
        simplename = self.get_simple_name(invalid_result="Unknown")
        # node pk + type
        if self.label:
            return "{} node [{}]: {}".format(simplename, self.pk, self.label)
        else:
            return "{} node [{}]".format(simplename, self.pk)

    # User email
    @hybrid_property
    def user_email(self):
        """
        Returns: the email of the user
        """
        return self.user.email

    @user_email.expression
    def user_email(cls):
        """
        Returns: the email of the user at a class level (i.e. in the database)
        """
        return select([DbUser.email
                       ]).where(DbUser.id == cls.user_id).label('user_email')

    # Computer name
    @hybrid_property
    def computer_name(self):
        """
        Returns: the of the computer
        """
        return self.dbcomputer.name

    @computer_name.expression
    def computer_name(cls):
        """
        Returns: the name of the computer at a class level (i.e. in the 
        database)
        """
        return select([
            DbComputer.name
        ]).where(DbComputer.id == cls.dbcomputer_id).label('computer_name')

    @hybrid_property
    def state(self):
        """
        Return the most recent state from DbCalcState
        """
        if not self.id:
            return None
        all_states = DbCalcState.query.filter(
            DbCalcState.dbnode_id == self.id).all()
        if all_states:
            # return max((st.time, st.state) for st in all_states)[1]
            return sort_states(((dbcalcstate.state, dbcalcstate.state.value)
                                for dbcalcstate in all_states),
                               use_key=True)[0]
        else:
            return None

    @state.expression
    def state(cls):
        """
        Return the expression to get the 'latest' state from DbCalcState,
        to be used in queries, where 'latest' is defined using the state order
        defined in _sorted_datastates.
        """
        # Sort first the latest states
        whens = {
            v: idx
            for idx, v in enumerate(_sorted_datastates[::-1], start=1)
        }
        custom_sort_order = case(
            value=DbCalcState.state, whens=whens,
            else_=100)  # else: high value to put it at the bottom

        # Add numerical state to string, to allow to sort them
        states_with_num = select([
            DbCalcState.id.label('id'),
            DbCalcState.dbnode_id.label('dbnode_id'),
            DbCalcState.state.label('state_string'),
            custom_sort_order.label('num_state')
        ]).select_from(DbCalcState).alias()

        # Get the most 'recent' state (using the state ordering, and the min function) for
        # each calc
        calc_state_num = select([
            states_with_num.c.dbnode_id.label('dbnode_id'),
            func.min(states_with_num.c.num_state).label('recent_state')
        ]).group_by(states_with_num.c.dbnode_id).alias()

        # Join the most-recent-state table with the DbCalcState table
        all_states_q = select([
            DbCalcState.dbnode_id.label('dbnode_id'),
            DbCalcState.state.label('state_string'),
            calc_state_num.c.recent_state.label('recent_state'),
            custom_sort_order.label('num_state'),
        ]).select_from(  # DbCalcState).alias().join(
            join(DbCalcState, calc_state_num,
                 DbCalcState.dbnode_id == calc_state_num.c.dbnode_id)).alias()

        # Get the association between each calc and only its corresponding most-recent-state row
        subq = select([
            all_states_q.c.dbnode_id.label('dbnode_id'),
            all_states_q.c.state_string.label('state')
        ]).select_from(all_states_q).where(
            all_states_q.c.num_state == all_states_q.c.recent_state).alias()

        # Final filtering for the actual query
        return select([subq.c.state]). \
            where(
            subq.c.dbnode_id == cls.id,
        ). \
            label('laststate')
Beispiel #8
0
class Workflow(Base):
    """
    An collection Stages and Tasks encoded as a DAG
    """
    __tablename__ = 'workflow'

    id = Column(Integer, primary_key=True)
    name = Column(VARCHAR(200), unique=True, nullable=False)
    successful = Column(Boolean, nullable=False)
    created_on = Column(DateTime)
    started_on = Column(DateTime)
    finished_on = Column(DateTime)
    max_cores = Column(Integer)
    primary_log_path = Column(String(255))
    _log = None

    info = Column(MutableDict.as_mutable(JSONEncodedDict))
    _status = Column(Enum_ColumnType(WorkflowStatus, length=255),
                     default=WorkflowStatus.no_attempt)
    stages = relationship("Stage",
                          cascade="all, merge, delete-orphan",
                          order_by="Stage.number",
                          passive_deletes=True,
                          backref='workflow')

    exclude_from_dict = ['info']
    dont_garbage_collect = None
    termination_signal = None

    @declared_attr
    def status(cls):
        def get_status(self):
            return self._status

        def set_status(self, value):
            if self._status != value:
                self._status = value
                signal_workflow_status_change.send(self)

        return synonym('_status', descriptor=property(get_status, set_status))

    @validates('name')
    def validate_name(self, key, name):
        assert re.match(r"^[\w-]+$", name), 'Invalid workflow name, characters are limited to letters, numbers, ' \
                                            'hyphens and underscores'
        return name

    @orm.reconstructor
    def constructor(self):
        self.__init__(manual_instantiation=False)

    def __init__(self, manual_instantiation=True, *args, **kwargs):
        # FIXME provide the cosmos_app instance?

        if manual_instantiation:
            raise TypeError(
                'Do not instantiate an Workflow manually.  Use the Cosmos.start method.'
            )
        super(Workflow, self).__init__(*args, **kwargs)
        # assert self.output_dir is not None, 'output_dir cannot be None'
        if self.info is None:
            # mutable dict column defaults to None
            self.info = dict()
        self.jobmanager = None
        if not self.created_on:
            self.created_on = datetime.datetime.now()
        self.dont_garbage_collect = []

    @property
    def log(self):
        if self._log is None:
            self._log = get_logger('%s' % self, self.primary_log_path)
        return self._log

    def make_output_dirs(self):
        """
        Create directory paths of all output files
        """
        dirs = set()

        for task in self.tasks:
            for out_name, v in task.output_map.iteritems():
                dirname = lambda p: p if out_name.endswith(
                    'dir') or p is None else os.path.dirname(p)

                if isinstance(v, (tuple, list)):
                    dirs.update(map(dirname, v))
                elif isinstance(v, dict):
                    raise NotImplemented()
                else:
                    dirs.add(dirname(v))

        for d in dirs:
            if d is not None and '://' not in d:
                mkdir(d)

    def add_task(self,
                 func,
                 params=None,
                 parents=None,
                 stage_name=None,
                 uid=None,
                 drm=None,
                 queue=None,
                 must_succeed=True,
                 time_req=None,
                 core_req=None,
                 mem_req=None,
                 max_attempts=None,
                 noop=False,
                 job_class=None,
                 drm_options=None):
        """
        Adds a new Task to the Workflow.  If the Task already exists (and was successful), return the successful Task stored in the database

        :param callable func: A function which returns a string which will get converted to a shell script to be executed.  `func` will not get called until
          all of its dependencies have completed.
        :param dict params: Parameters to `func`.  Must be jsonable so that it can be stored in the database.  Any Dependency objects will get resolved into
            a string, and the Dependency.task will be added to this Task's parents.
        :param list[Tasks] parents: A list of dependent Tasks.
        :param str uid: A unique identifier for this Task, primarily used for skipping  previously successful Tasks.
            If a Task with this stage_name and uid already exists in the database (and was successful), the
            database version will be returned and a new one will not be created.
        :param str stage_name: The name of the Stage to add this Task to.  Defaults to `func.__name__`.
        :param str drm: The drm to use for this Task (example 'local', 'ge' or 'drmaa:lsf').  Defaults to the `default_drm` parameter of :meth:`Cosmos.start`
        :param job_class: The name of a job_class to submit to; defaults to the `default_job_class` parameter of :meth:`Cosmos.start`
        :param queue: The name of a queue to submit to; defaults to the `default_queue` parameter of :meth:`Cosmos.start`
        :param bool must_succeed: Default True.  If False, the Workflow will not fail if this Task does not succeed.  Dependent Jobs will not be executed.
        :param bool time_req: The time requirement; will set the Task.time_req attribute which is intended to be used by :func:`get_submit_args` to request resources.
        :param int cpu_req: Number of cpus required for this Task.  Can also be set in the `params` dict or the default value of the Task function signature, but this value takes precedence.
            Warning!  In future versions, this will be the only way to set it.
        :param int mem_req: Number of MB of RAM required for this Task.   Can also be set in the `params` dict or the default value of the Task function signature, but this value takes predence.
            Warning!  In future versions, this will be the only way to set it.
        :param int max_attempts: The maximum number of times to retry a failed job.  Defaults to the `default_max_attempts` parameter of :meth:`Cosmos.start`
        :rtype: cosmos.api.Task
        """
        # Avoid cyclical import dependencies
        from cosmos.job.drm.DRM_Base import DRM
        from cosmos.models.Stage import Stage
        from cosmos import recursive_resolve_dependency

        # parents
        if parents is None:
            parents = []
        elif isinstance(parents, Task):
            parents = [parents]
        else:
            parents = list(parents)

        # params
        if params is None:
            params = dict()
        for k, v in params.iteritems():
            # decompose `Dependency` objects to values and parents
            new_val, parent_tasks = recursive_resolve_dependency(v)

            params[k] = new_val
            parents.extend(parent_tasks - set(parents))

        # uid
        if uid is None:
            raise AssertionError, 'uid parameter must be specified'
            # Fix me assert params are all JSONable
            # uid = str(params)
        else:
            assert isinstance(uid, basestring), 'uid must be a string'

        if stage_name is None:
            stage_name = str(func.__name__)

        # Get the right Stage
        stage = only_one((s for s in self.stages if s.name == stage_name),
                         None)
        if stage is None:
            stage = Stage(workflow=self,
                          name=stage_name,
                          status=StageStatus.no_attempt)
            self.session.add(stage)

        # Check if task is already in stage
        task = stage.get_task(uid, None)

        if task is not None:
            # if task is already in stage, but unsuccessful, raise an error (duplicate params) since unsuccessful tasks
            # were already removed on workflow load
            if task.successful:
                # If the user manually edited the dag and this a resume, parents might need to be-readded
                task.parents.extend(set(parents).difference(set(task.parents)))

                for p in parents:
                    if p.stage not in stage.parents:
                        stage.parents.append(p.stage)

                return task
            else:
                # TODO check for duplicate params here?  would be a lot faster at Workflow.run
                raise ValueError(
                    'Duplicate uid, you have added a Task to Stage %s with the uid (unique identifier) `%s` twice.  '
                    'Task uids must be unique within the same Stage.' %
                    (stage_name, uid))
        else:
            # Create Task
            sig = funcsigs.signature(func)

            def params_or_signature_default_or(name, default):
                if name in params:
                    return params[name]
                if name in sig.parameters:
                    param_default = sig.parameters[name].default
                    if param_default is funcsigs._empty:
                        return default
                    else:
                        return param_default
                return default

            input_map = dict()
            output_map = dict()

            for keyword, param in sig.parameters.iteritems():
                if keyword.startswith('in_'):
                    v = params.get(keyword, param.default)
                    assert v != funcsigs._empty, 'parameter %s for %s is required' % (
                        param, func)
                    input_map[keyword] = v
                elif keyword.startswith('out_'):
                    v = params.get(keyword, param.default)
                    assert v != funcsigs._empty, 'parameter %s for %s is required' % (
                        param, func)
                    output_map[keyword] = v

            task = Task(
                stage=stage,
                params=params,
                parents=parents,
                input_map=input_map,
                output_map=output_map,
                uid=uid,
                drm=drm if drm is not None else self.cosmos_app.default_drm,
                job_class=job_class if job_class is not None else
                self.cosmos_app.default_job_class,
                queue=queue
                if queue is not None else self.cosmos_app.default_queue,
                must_succeed=must_succeed,
                core_req=core_req if core_req is not None else
                params_or_signature_default_or('core_req', 1),
                mem_req=mem_req if mem_req is not None else
                params_or_signature_default_or('mem_req', None),
                time_req=time_req
                if time_req is not None else self.cosmos_app.default_time_req,
                successful=False,
                max_attempts=max_attempts if max_attempts is not None else
                self.cosmos_app.default_max_attempts,
                attempt=1,
                NOOP=noop)

            task.cmd_fxn = func

            task.drm_options = drm_options if drm_options is not None else self.cosmos_app.default_drm_options
            DRM.validate_drm_options(task.drm, task.drm_options)

        # Add Stage Dependencies
        for p in parents:
            if p.stage not in stage.parents:
                stage.parents.append(p.stage)

        self.dont_garbage_collect.append(task)

        return task

    def run(self,
            max_cores=None,
            dry=False,
            set_successful=True,
            cmd_wrapper=signature.default_cmd_fxn_wrapper,
            log_out_dir_func=default_task_log_output_dir):
        """
        Runs this Workflow's DAG

        :param int max_cores: The maximum number of cores to use at once.  A value of None indicates no maximum.
        :param int max_attempts: The maximum number of times to retry a failed job.
             Can be overridden with on a per-Task basis with Workflow.add_task(..., max_attempts=N, ...)
        :param callable log_out_dir_func: A function that returns a Task's logging directory (must be unique).
             It receives one parameter: the Task instance.
             By default a Task's log output is stored in log/stage_name/task_id.
             See _default_task_log_output_dir for more info.
        :param callable cmd_wrapper: A decorator which will be applied to every Task's cmd_fxn.
        :param bool dry: If True, do not actually run any jobs.
        :param bool set_successful: Sets this workflow as successful if all tasks finish without a failure.  You might set this to False if you intend to add and
            run more tasks in this workflow later.

        Returns True if all tasks in the workflow ran successfully, False otherwise.
        If dry is specified, returns None.
        """
        try:
            assert os.path.exists(os.getcwd(
            )), 'current working dir does not exist! %s' % os.getcwd()

            assert hasattr(
                self, 'cosmos_app'
            ), 'Workflow was not initialized using the Workflow.start method'
            assert hasattr(log_out_dir_func,
                           '__call__'), 'log_out_dir_func must be a function'
            assert self.session, 'Workflow must be part of a sqlalchemy session'

            session = self.session
            self.log.info('Preparing to run %s using DRM `%s`, cwd is `%s`' %
                          (self, self.cosmos_app.default_drm, os.getcwd()))
            self.log.info('Running as %s@%s, pid %s' %
                          (getpass.getuser(), os.uname()[1], os.getpid()))

            self.max_cores = max_cores

            from ..job.JobManager import JobManager

            if self.jobmanager is None:
                self.jobmanager = JobManager(
                    get_submit_args=self.cosmos_app.get_submit_args,
                    cmd_wrapper=cmd_wrapper,
                    log_out_dir_func=log_out_dir_func)

            self.status = WorkflowStatus.running
            self.successful = False

            if self.started_on is None:
                self.started_on = datetime.datetime.now()

            task_graph = self.task_graph()
            stage_graph = self.stage_graph()

            assert len(set(self.stages)) == len(
                self.stages), 'duplicate stage name detected: %s' % (next(
                    duplicates(self.stages)))

            # renumber stages
            stage_graph_no_cycles = nx.DiGraph()
            stage_graph_no_cycles.add_nodes_from(stage_graph.nodes())
            stage_graph_no_cycles.add_edges_from(stage_graph.edges())
            for cycle in nx.simple_cycles(stage_graph):
                stage_graph_no_cycles.remove_edge(cycle[-1], cycle[0])
            for i, s in enumerate(topological_sort(stage_graph_no_cycles)):
                s.number = i + 1
                if s.status != StageStatus.successful:
                    s.status = StageStatus.no_attempt

            # Make sure everything is in the sqlalchemy session
            session.add(self)
            successful = filter(lambda t: t.successful, task_graph.nodes())

            # print stages
            for s in sorted(self.stages, key=lambda s: s.number):
                self.log.info('%s %s' % (s, s.status))

            # Create Task Queue
            task_queue = _copy_graph(task_graph)
            self.log.info('Skipping %s successful tasks...' % len(successful))
            task_queue.remove_nodes_from(successful)

            handle_exits(self)

            if self.max_cores is not None:
                self.log.info('Ensuring there are enough cores...')
                # make sure we've got enough cores
                for t in task_queue:
                    assert int(
                        t.core_req
                    ) <= self.max_cores, '%s requires more cpus (%s) than `max_cores` (%s)' % (
                        t, t.core_req, self.max_cores)

            # Run this thing!
            self.log.info('Committing to SQL db...')
            session.commit()
            if not dry:
                _run(self, session, task_queue)

                # set status
                if self.status == WorkflowStatus.failed_but_running:
                    self.status = WorkflowStatus.failed
                    # set stage status to failed
                    for s in self.stages:
                        if s.status == StageStatus.running_but_failed:
                            s.status = StageStatus.failed
                    session.commit()
                    return False
                elif self.status == WorkflowStatus.running:
                    if set_successful:
                        self.status = WorkflowStatus.successful
                    session.commit()
                    return True
                else:
                    self.log.warning('%s exited with status "%s"', self,
                                     self.status)
                    session.commit()
                    return False
            else:
                self.log.info('Workflow dry run is complete')
                return None
        except Exception as ex:
            self.log.fatal(ex, exc_info=True)
            raise

    def terminate(self, due_to_failure=True):
        self.log.warning('Terminating %s!' % self)
        if self.jobmanager:
            self.log.info(
                'Processing finished tasks and terminating {num_running_tasks} running tasks'
                .format(num_running_tasks=len(
                    self.jobmanager.running_tasks), ))
            _process_finished_tasks(self.jobmanager)
            self.jobmanager.terminate()

        if due_to_failure:
            self.status = WorkflowStatus.failed
        else:
            self.status = WorkflowStatus.killed

        self.session.commit()

    def cleanup(self):
        if self.jobmanager:
            self.log.info('Cleaning up {num_dead_tasks} dead tasks'.format(
                num_dead_tasks=len(self.jobmanager.dead_tasks), ))
            self.jobmanager.cleanup()

    @property
    def tasks(self):
        return [t for s in self.stages for t in s.tasks]
        # return session.query(Task).join(Stage).filter(Stage.workflow == ex).all()

    def stage_graph(self):
        """
        :return: (networkx.DiGraph) a DAG of the stages
        """
        g = nx.DiGraph()
        g.add_nodes_from(self.stages)
        g.add_edges_from((s, c) for s in self.stages for c in s.children if c)
        return g

    def task_graph(self):
        """
        :return: (networkx.DiGraph) a DAG of the tasks
        """
        g = nx.DiGraph()
        g.add_nodes_from(self.tasks)
        g.add_edges_from([(t, c) for t in self.tasks for c in t.children])
        return g

    def get_stage(self, name_or_id):
        if isinstance(name_or_id, int):
            f = lambda s: s.id == name_or_id
        else:
            f = lambda s: s.name == name_or_id

        for stage in self.stages:
            if f(stage):
                return stage

        raise ValueError('Stage with name %s does not exist' % name_or_id)

    @property
    def url(self):
        return url_for('cosmos.workflow', name=self.name)

    def __repr__(self):
        return '<Workflow[%s] %s>' % (self.id or '', self.name)

    def __unicode__(self):
        return self.__repr__()

    def delete(self, delete_files=False):
        """
        :param delete_files: (bool) If True, delete :attr:`output_dir` directory and all contents on the filesystem
        """
        if hasattr(self, 'log'):
            self.log.info('Deleting %s, delete_files=%s' %
                          (self, delete_files))
            for h in self.log.handlers:
                h.flush()
                h.close()
                self.log.removeHandler(h)

        if delete_files:
            raise NotImplementedError(
                'This should delete all Task.output_files')

        print >> sys.stderr, '%s Deleting from SQL...' % self
        self.session.delete(self)
        self.session.commit()
        print >> sys.stderr, '%s Deleted' % self

    def get_first_failed_task(self, key=lambda t: t.finished_on):
        """
        Return the first failed Task (chronologically).

        If no Task failed, return None.
        """
        for t in sorted([t for t in self.tasks if key(t) is not None],
                        key=key):
            if t.exit_status:
                return t
        return None
Beispiel #9
0
def downgrade():
    op.alter_column("todo", "todo_description", type_=String(256))
    op.alter_column("achievement", "ac_description", type_=String(512))
class Regiao(db.Model):
    id = Column(Integer, primary_key=True)
    nome = Column(String(255))

    def __repr__(self):
        return self.nome
Beispiel #11
0
class Swat(DatabaseObject):
    '''
    Holds the bribe history of players that get 'SWAT'd
    '''

    uuid = Column(String(36),
                  unique=True,
                  nullable=False,
                  default=lambda: str(uuid4()))
    user_id = Column(Integer, ForeignKey('user.id'), nullable=False)
    target_id = Column(Integer, ForeignKey('user.id'), nullable=False)
    paid = Column(Integer, nullable=False)
    accepted = Column(Boolean, default=False)
    completed = Column(Boolean, default=False)

    @classmethod
    def all(cls):
        ''' Returns a list of all objects in the database '''
        return dbsession.query(cls).all()

    @classmethod
    def all_pending(cls):
        return dbsession.query(cls).filter(
            and_(cls.accepted == False,
                 cls.completed == False)).order_by(desc(cls.created)).all()

    @classmethod
    def all_in_progress(cls):
        return dbsession.query(cls).filter(
            and_(cls.accepted == True,
                 cls.completed == False)).order_by(desc(cls.created)).all()

    @classmethod
    def all_completed(cls):
        return dbsession.query(cls).filter_by(completed=True).order_by(
            desc(cls.created)).all()

    @classmethod
    def pending_by_target_id(cls, uid):
        return dbsession.query(cls).filter_by(completed=False).filter(
            and_(cls.accepted == False, cls.target_id == uid)).all()

    @classmethod
    def in_progress_by_target_id(cls, uid):
        return dbsession.query(cls).filter(
            and_(cls.accepted == True,
                 cls.completed == False)).filter_by(target_id=uid).all()

    @classmethod
    def by_id(cls, ident):
        ''' Returns a the object with id of ident '''
        return dbsession.query(cls).filter_by(id=ident).first()

    @classmethod
    def by_uuid(cls, uuid):
        ''' Returns a the object with given uuid'''
        return dbsession.query(cls).filter_by(uuid=uuid).first()

    @classmethod
    def by_user_id(cls, uid):
        ''' Return all objects based on user id '''
        return dbsession.query(cls).filter_by(user_id=uid).all()

    @classmethod
    def by_target_id(cls, uid):
        ''' Return all objects based on target id '''
        return dbsession.query(cls).filter_by(target_id=uid).all()

    @classmethod
    def count_completed_by_target_id(cls, uid):
        ''' Return the number of completed bribes in database '''
        return dbsession.query(cls).filter(
            and_(cls.completed == True, cls.target_id == uid)).count()

    @classmethod
    def ordered(cls):
        ''' Return all bribes in chronological order '''
        return dbsession.query(cls).order_by(desc(cls.created)).all()

    @classmethod
    def ordered_by_user_id(cls, uid):
        ''' Return all bribes for user id in chronological order '''
        return dbsession.query(cls).filter_by(user_id=uid).order_by(
            desc(cls.created)).all()

    @classmethod
    def ordered_by_target_id(cls, uid):
        ''' Return all bribes for target id in chronological order '''
        return dbsession.query(cls).filter_by(target_id=uid).order_by(
            desc(cls.created)).all()

    @classmethod
    def get_price(cls, user):
        ''' Calculate price of next bribe based on history '''
        config = ConfigManager.instance()
        base_price = config.bribe_cost
        return base_price + (cls.count_completed_by_target_id(user.id) *
                             base_price)

    @classmethod
    def user_is_pending(cls, user):
        ''' Return bool based on if there are any pending bribes in database '''
        return 0 < len(cls.pending_by_target_id(user.id))

    @classmethod
    def user_is_in_progress(cls, user):
        ''' Returns bool based on if a user had a bribe in progress '''
        return 0 < len(cls.in_progress_by_target_id(user.id))

    @property
    def user(self):
        return User.by_id(self.user_id)

    @property
    def target(self):
        return User.by_id(self.target_id)

    def is_pending(self):
        return True if not self.accepted and not self.completed else False

    def is_in_progress(self):
        return True if self.accepted and not self.completed else False

    def is_declined(self):
        return True if not self.accepted and self.completed else False

    def is_successful(self):
        return True if self.accepted and self.completed else False

    def __repr__(self):
        return '<SWAT user_id: %d, target_id: %d' % (
            self.user_id,
            self.target_id,
        )
Beispiel #12
0
def import_tushare_margin(chain_param=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :return:
    """
    table_name = 'tushare_stock_margin'
    logging.info("更新 %s 开始", table_name)
    param_list = [
        ('trade_date', Date),
        ('exchange_id', String(20)),
        ('rzye', DOUBLE),
        ('rzmre', DOUBLE),
        ('rzche', DOUBLE),
        ('rqye', DOUBLE),
        ('rqmcl', DOUBLE),
        ('rzrqye', DOUBLE),
    ]

    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有tushare_daily_basic

    if has_table:
        sql_str = """
                     select cal_date            
                     FROM
                      (
                       select * from tushare_trade_date trddate 
                       where( cal_date>(SELECT max(trade_date) FROM  {table_name}))
                     )tt
                     where (is_open=1 
                            and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                            and exchange_id='SSE') """.format(
            table_name=table_name)
    else:
        sql_str = """
                     SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 
                  AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) 
                  AND exchange_id='SSE'  AND cal_date>='2010-03-31') ORDER BY cal_date"""
        logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取交易日数据
        table = session.execute(sql_str)
        trddate = list(row[0] for row in table.fetchall())
    # 设置 dtype
    dtype = {key: val for key, val in param_list}

    try:
        for i in range(len(trddate)):
            trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS)
            for exchange_id in list(['SSE', 'SZSE']):
                data_df = invoke_margin(trade_date=trade_date,
                                        exchange_id=exchange_id)
                if len(data_df) > 0:
                    data_count = bunch_insert_on_duplicate_update(
                        data_df, table_name, engine_md, dtype)
                    logging.info("%s更新 %s %s 结束 %d 条信息被更新", trade_date,
                                 table_name, exchange_id, data_count)
                else:
                    logging.info("无数据信息可被更新")
    finally:
        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            # build_primary_key([table_name])
            create_pk_str = """ALTER TABLE {table_name}
                CHANGE COLUMN `trade_date` `trade_date` VARCHAR(20) NOT NULL FIRST,
                ADD PRIMARY KEY (`trade_date`)""".format(table_name=table_name)
            with with_db_session(engine_md) as session:
                session.execute(create_pk_str)
            logger.info('%s 表  `trade_date` 主键设置完成', table_name)
Beispiel #13
0
class JobStatus(Base):
    __tablename__ = 'job_status'
    id = Column(Integer, primary_key=True)
    name = Column(String(length=50), nullable=False)
    desc = Column(String(length=200))
Beispiel #14
0
class Trigger(Base):
    __tablename__ = 'triggers'

    id = Column(Integer, primary_key=True)
    spider_id = Column(Integer, ForeignKey('spiders.id'))
    cron_pattern = Column(String(length=50))
Beispiel #15
0
class Project(Base):
    __tablename__ = 'projects'

    id = Column(Integer, primary_key=True)
    name = Column(String(length=50))
    version = Column(String(length=50))
Beispiel #16
0
def register_tables(metadata: MetaData) -> ModelMap:
    return ModelMap(
        post=ModelHelper(
            table=Table(
                "post",
                metadata,
                Column("id", Integer, primary_key=True, autoincrement=True),
                Column("title", String(150), nullable=False),
                Column("author_id", String(32), nullable=False, index=True),
                Column("content", Text),
                Column("created",
                       TIMESTAMP,
                       nullable=False,
                       server_default=func.now()),
                Column(
                    "updated",
                    TIMESTAMP,
                    nullable=False,
                    server_default=text(
                        "CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP"),
                ),
                mysql_engine="InnoDB",
                mysql_charset="utf8mb4",
            ),
            author_key="author_id",
            engine=metadata.bind,
        ),
        comment=ModelHelper(
            table=Table(
                "comment",
                metadata,
                Column("id", Integer, primary_key=True, autoincrement=True),
                Column(
                    "post_id",
                    Integer,
                    ForeignKey("post.id", ondelete="CASCADE"),
                    nullable=False,
                    index=True,
                ),
                Column("author_id", String(32), nullable=False),
                Column("content", Text),
                Column("created",
                       TIMESTAMP,
                       nullable=False,
                       server_default=func.now()),
                Column(
                    "updated",
                    TIMESTAMP,
                    nullable=False,
                    server_default=text(
                        "CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP"),
                ),
                mysql_engine="InnoDB",
                mysql_charset="utf8mb4",
            ),
            author_key="author_id",
            engine=metadata.bind,
        ),
        reaction=ModelHelper(
            table=Table(
                "reaction",
                metadata,
                Column("id", Integer, primary_key=True, autoincrement=True),
                Column(
                    "comment_id",
                    Integer,
                    ForeignKey("comment.id", ondelete="CASCADE"),
                    nullable=False,
                    index=True,
                ),
                Column("author_id", String(32), nullable=False),
                Column("reaction_type", Enum(ReactionType), nullable=False),
                Column(
                    "updated",
                    TIMESTAMP,
                    nullable=False,
                    server_default=text(
                        "CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP"),
                ),
                UniqueConstraint("comment_id", "author_id"),
                mysql_engine="InnoDB",
                mysql_charset="utf8mb4",
            ),
            author_key="author_id",
            engine=metadata.bind,
        ),
    )
Beispiel #17
0
class JSONType(PickleType):
    impl = String(191)

    def __init__(self, pickler=JSONPickle, **kwargs):
        super().__init__(pickler=pickler, **kwargs)
Beispiel #18
0
class RouteStop(Base):
    datasource = config.DATASOURCE_DERIVED

    __tablename__ = 'route_stops'

    route_id = Column(String(255),
                      primary_key=True,
                      index=True,
                      nullable=False)
    direction_id = Column(Integer,
                          primary_key=True,
                          index=True,
                          nullable=False)
    stop_id = Column(String(255), primary_key=True, index=True, nullable=False)
    order = Column(Integer, index=True, nullable=False)
    start_date = Column(Date, index=True, nullable=False)
    end_date = Column(Date, index=True, nullable=False)

    route = relationship('Route',
                         primaryjoin='RouteStop.route_id==Route.route_id',
                         foreign_keys='(RouteStop.route_id)',
                         uselist=False,
                         viewonly=True,
                         lazy='joined')

    stop = relationship('Stop',
                        primaryjoin='RouteStop.stop_id==Stop.stop_id',
                        foreign_keys='(RouteStop.stop_id)',
                        uselist=False,
                        viewonly=True,
                        lazy='joined')

    direction = relationship(
        'RouteDirection',
        primaryjoin=
        'RouteStop.route_id==RouteDirection.route_id and RouteStop.direction_id==RouteDirection.direction_id',
        foreign_keys='(RouteStop.route_id, RouteStop.direction_id)',
        uselist=False,
        viewonly=True,
        lazy='joined')

    start_calendar = relationship(
        'UniversalCalendar',
        primaryjoin='RouteStop.start_date==UniversalCalendar.date',
        foreign_keys='(RouteStop.start_date)',
        uselist=True,
        viewonly=True)

    end_calendar = relationship(
        'UniversalCalendar',
        primaryjoin='RouteStop.end_date==UniversalCalendar.date',
        foreign_keys='(RouteStop.end_date)',
        uselist=True,
        viewonly=True)

    def is_active(self, date=None):
        """ :return False whenever we see that the route_stop's start and end date are
                    outside the input date (where the input date defaults to 'today')
        """
        _is_active = False
        if self.start_date and self.end_date:
            if date is None:
                date = datetime.date.today()
            if self.start_date <= date <= self.end_date:
                _is_active = True
        return _is_active

    @classmethod
    def is_stop_active(cls, session, stop_id, agency_id=None, date=None):
        ''' returns boolean whether given stop id is active for a given date
        '''
        ret_val = False

        # step 1: default date
        if date is None or not isinstance(date, datetime.date):
            date = datetime.date.today()

        # step 2: get RouteStop object
        rs = RouteStop.query_by_stop(session, stop_id, agency_id, date, 1)
        if rs and len(rs) > 0:
            ret_val = True
        return ret_val

    @classmethod
    def query_by_stop(cls,
                      session,
                      stop_id,
                      agency_id=None,
                      date=None,
                      count=None,
                      sort=False):
        ''' get all route stop records by looking for a given stop_id.
            further filtering can be had by providing an active date and agency id
        '''
        #import pdb; pdb.set_trace()
        # step 1: query all route stops by stop id (and maybe agency)
        q = session.query(RouteStop).filter(RouteStop.stop_id == stop_id)
        if agency_id is not None:
            q = q.filter(RouteStop.agency_id == agency_id)

        # step 2: filter based on date
        if date:
            q = q.filter(RouteStop.start_date <= date).filter(
                date <= RouteStop.end_date)

        # step 3: limit the number of objects returned by query
        if count:
            q = q.limit(count)

        # step 4: sort the results based on order column
        if sort:
            q = q.order_by(RouteStop.order)

        ret_val = q.all()
        return ret_val

    @classmethod
    def unique_routes_at_stop(cls,
                              session,
                              stop_id,
                              agency_id=None,
                              date=None,
                              route_name_filter=False):
        ''' get a unique set of route records by looking for a given stop_id.
            further filtering can be had by providing an active date and agency id, and route name
        '''
        ret_val = []

        route_ids = []
        route_names = []

        route_stops = RouteStop.query_by_stop(session,
                                              stop_id,
                                              agency_id,
                                              date,
                                              sort=True)
        for rs in route_stops:
            # step 1: filter(s) check
            if rs.route_id in route_ids: continue
            if route_name_filter and rs.route.route_name in route_names:
                continue
            route_ids.append(rs.route_id)
            route_names.append(rs.route.route_name)

            # step 2: append route object to results
            ret_val.append(rs.route)
        return ret_val

    @classmethod
    def active_unique_routes_at_stop(cls,
                                     session,
                                     stop_id,
                                     agency_id=None,
                                     date=None,
                                     route_name_filter=False):
        ''' to filter active routes, just provide a date to the above unique_routes_at_stop method
        '''
        # make sure date is not null...
        if date is None or not isinstance(date, datetime.date):
            date = datetime.date.today()
        return cls.unique_routes_at_stop(session, stop_id, agency_id, date,
                                         route_name_filter)

    @classmethod
    def active_stops(cls,
                     session,
                     route_id,
                     direction_id=None,
                     agency_id=None,
                     date=None):
        ''' returns list of routes that are seen as 'active' based on dates and filters
        '''

        # step 1: default date
        if date is None or not isinstance(date, datetime.date):
            date = datetime.date.today()

        # step 2a: query all route stops by route (and maybe direction and agency
        q = session.query(RouteStop).filter(RouteStop.route_id == route_id)
        if direction_id is not None:
            q = q.filter(RouteStop.direction_id == direction_id)
        if agency_id is not None:
            q = q.filter(RouteStop.agency_id == agency_id)

        # step 2b: filter based on date
        q = q.filter(RouteStop.start_date <= date).filter(
            date <= RouteStop.end_date)

        # step 2c: add some stop order
        q = q.order_by(RouteStop.order)

        #import pdb; pdb.set_trace()
        route_stops = q.all()
        return route_stops

    @classmethod
    def load(cls, db, **kwargs):
        log.debug('{0}.load (loaded later in post_process)'.format(
            cls.__name__))
        pass

    @classmethod
    def post_process(cls, db, **kwargs):
        log.debug('{0}.post_process'.format(cls.__name__))
        cls.populate(db.session)

    @classmethod
    def populate(cls, session):
        ''' for each route/direction, find list of stop_ids for route/direction pairs

            the load is a two part process, where part A finds a list of unique stop ids, and
            part B creates the RouteStop (and potentially RouteDirections ... if not in GTFS) records
        '''
        from gtfsdb import Route, RouteDirection

        #import pdb; pdb.set_trace()
        start_time = time.time()
        routes = session.query(Route).all()

        for r in routes:
            # step 0: figure out some info about the route
            create_directions = False
            if r.directions is None or len(r.directions) == 0:
                create_directions = True

            # step 1a: filter the list of trips down to only a trip with a unique pattern
            trips = []
            shape_id_filter = []
            for t in r.trips:
                # a bit of a speedup to filter trips that have the same shape
                if t.shape_id and t.shape_id in shape_id_filter:
                    continue
                # store our trips
                shape_id_filter.append(t.shape_id)
                trips.append(t)

            # step 1b: sort our list of trips by length (note: for trips with two directions, ...)
            trips = sorted(trips, key=lambda t: t.trip_len, reverse=True)

            # step 2: get a hash table of route stops with effective start and end dates
            stop_effective_dates = cls._find_route_stop_effective_dates(
                session, r.route_id)

            # PART A: we're going to just collect a list of unique stop ids for this route / directions
            for d in [0, 1]:
                unique_stops = []

                # step 3: loop through all our trips and their stop times, pulling out a unique set of stops
                for t in trips:
                    if t.direction_id == d:

                        # step 4: loop through this trip's stop times, and find any/all stops that are in our stop list already
                        #         further, let's try to find the best position of that stop (e.g., look for where the stop patterns breaks)
                        last_pos = None
                        for i, st in enumerate(t.stop_times):
                            # step 5a: make sure this stop that customers can actually board...
                            if st.is_boarding_stop():
                                if st.stop_id in unique_stops:
                                    last_pos = unique_stops.index(st.stop_id)
                                else:
                                    # step 5b: add ths stop id to our unique list ... either in position, or appended to the end of the list
                                    if last_pos:
                                        last_pos += 1
                                        unique_stops.insert(
                                            last_pos, st.stop_id)
                                    else:
                                        unique_stops.append(st.stop_id)

                # PART B: add records to the database ...
                if len(unique_stops) > 0:

                    # step 6: if an entry for the direction doesn't exist, create a new
                    #         RouteDirection record and add it to this route
                    if create_directions:
                        rd = RouteDirection()
                        rd.route_id = r.route_id
                        rd.direction_id = d
                        rd.direction_name = "Outbound" if d == 0 else "Inbound"
                        session.add(rd)

                    # step 7: create new RouteStop records
                    for k, stop_id in enumerate(unique_stops):
                        # step 4b: create a RouteStop record
                        rs = RouteStop()
                        rs.route_id = r.route_id
                        rs.direction_id = d
                        rs.stop_id = stop_id
                        rs.order = k + 1
                        rs.start_date = stop_effective_dates[stop_id][1]
                        rs.end_date = stop_effective_dates[stop_id][2]
                        session.add(rs)

            # step 8: commit the new records to the db for this route...
            sys.stdout.write('*')
            session.commit()

        # step 9: final commit for any stragglers
        session.commit()
        session.flush()
        session.close()

        processing_time = time.time() - start_time
        log.debug('{0}.post_process ({1:.0f} seconds)'.format(
            cls.__name__, processing_time))

    @classmethod
    def _find_route_stop_effective_dates(cls, session, route_id):
        ''' find effective start date and end date for all stops of the input route, when
            queried against the trip and stop time tables.  Below are a couple of pure SQL queries that
            perform what I'm doing to get said start and end dates:

            # query all route stops with start & end dates
            SELECT t.route_id, st.stop_id, min(date), max(date)
            FROM ott.universal_calendar u, ott.trips t, ott.stop_times st
            WHERE t.service_id = u.service_id
              AND t.trip_id    = st.trip_id
            GROUP BY t.route_id, st.stop_id

            # query all stops start & end dates for a given route (used below in SQLAlchemy)
            SELECT st.stop_id, min(date), max(date)
            FROM ott.universal_calendar u, ott.trips t, ott.stop_times st
            WHERE t.service_id = u.service_id
              AND t.trip_id    = st.trip_id
              AND st.stop_id   = '1'
            GROUP BY st.stop_id

            @:return hash table with stop_id as key, and tuple of (stop_id, start_date, end_date) for all route stops
        '''
        #import pdb; pdb.set_trace()
        ret_val = {}

        # step 1: query the route/stop start and end dates, based on stop time table
        from gtfsdb import UniversalCalendar, StopTime, Trip
        q = session.query(StopTime.stop_id, func.min(UniversalCalendar.date),
                          func.max(UniversalCalendar.date))
        q = q.filter(UniversalCalendar.service_id == Trip.service_id)
        q = q.filter(Trip.trip_id == StopTime.trip_id)
        q = q.filter(Trip.route_id == route_id)
        q = q.group_by(StopTime.stop_id)
        stop_dates = q.all()

        # step 2: make a hash of these dates with the stop id as the key
        for d in stop_dates:
            ret_val[d[0]] = d

        return ret_val
Beispiel #19
0
class DbNode(Base):
    __tablename__ = "db_dbnode"
    id = Column(Integer, primary_key=True)
    uuid = Column(UUID(as_uuid=True), default=uuid_func)
    type = Column(String(255), index=True)
    label = Column(String(255), index=True, nullable=True)
    description = Column(Text(), nullable=True)
    ctime = Column(DateTime(timezone=True), default=timezone.now)
    mtime = Column(DateTime(timezone=True), default=timezone.now)
    dbcomputer_id = Column(Integer,
                           ForeignKey('db_dbcomputer.id',
                                      deferrable=True,
                                      initially="DEFERRED"),
                           nullable=True)
    dbcomputer = relationship('DbComputer',
                              backref=backref('dbnodes', passive_deletes=True))
    user_id = Column(Integer,
                     ForeignKey('db_dbuser.id',
                                deferrable=True,
                                initially="DEFERRED"),
                     nullable=False)
    user = relationship('DbUser', backref='dbnodes')

    public = Column(Boolean, default=False)

    nodeversion = Column(Integer, default=1)

    attributes = relationship('DbAttribute', uselist=True, backref='dbnode')
    extras = relationship('DbExtra', uselist=True, backref='dbnode')

    outputs = relationship("DbNode",
                           secondary="db_dblink",
                           primaryjoin="DbNode.id == DbLink.input_id",
                           secondaryjoin="DbNode.id == DbLink.output_id",
                           backref=backref("inputs", passive_deletes=True),
                           passive_deletes=True)

    children = relationship("DbNode",
                            secondary="db_dbpath",
                            primaryjoin="DbNode.id == DbPath.parent_id",
                            secondaryjoin="DbNode.id == DbPath.child_id",
                            backref="parents")

    def get_aiida_class(self):
        """
        Return the corresponding instance of
        :func:`~aiida.orm.implementation.django.node.Node`
        or a subclass return by the plugin loader.

        .. todo::
            The behavior is quite pathetic, creating a django DbNode instance
            to instantiate the aiida instance.
            These means that every time you load Aiida instances with
            the QueryBuilder when using Django as a backend, three instances
            are instantiated for every Aiida instance you load!
            Could be fixed by allowing DbNode from the dummy nodel to be passed
            to AiidaNode's __init__.

        :returns: An instance of the plugin class
        """
        # I need to import the DbNode in the Django model,
        # and instantiate an object that has the same attributes as self.
        from aiida.backends.djsite.db.models import DbNode as DjangoSchemaDbNode
        dbnode = DjangoSchemaDbNode(id=self.id,
                                    type=self.type,
                                    uuid=self.uuid,
                                    ctime=self.ctime,
                                    mtime=self.mtime,
                                    label=self.label,
                                    dbcomputer_id=self.dbcomputer_id,
                                    user_id=self.user_id,
                                    public=self.public,
                                    nodeversion=self.nodeversion)
        return dbnode.get_aiida_class()

    @hybrid_property
    def state(self):
        """
        Return the most recent state from DbCalcState
        """
        if not self.id:
            return None
        all_states = DbCalcState.query.filter(
            DbCalcState.dbnode_id == self.id).all()
        if all_states:
            #return max((st.time, st.state) for st in all_states)[1]
            return sort_states(((dbcalcstate.state, dbcalcstate.state.value)
                                for dbcalcstate in all_states),
                               use_key=True)[0]
        else:
            return None

    @state.expression
    def state(cls):
        """
        Return the expression to get the 'latest' state from DbCalcState,
        to be used in queries, where 'latest' is defined using the state order
        defined in _sorted_datastates.
        """
        # Sort first the latest states
        whens = {
            v: idx
            for idx, v in enumerate(_sorted_datastates[::-1], start=1)
        }
        custom_sort_order = case(
            value=DbCalcState.state, whens=whens,
            else_=100)  # else: high value to put it at the bottom

        # Add numerical state to string, to allow to sort them
        states_with_num = select([
            DbCalcState.id.label('id'),
            DbCalcState.dbnode_id.label('dbnode_id'),
            DbCalcState.state.label('state_string'),
            custom_sort_order.label('num_state')
        ]).select_from(DbCalcState).alias()

        # Get the most 'recent' state (using the state ordering, and the min function) for
        # each calc
        calc_state_num = select([
            states_with_num.c.dbnode_id.label('dbnode_id'),
            func.min(states_with_num.c.num_state).label('recent_state')
        ]).group_by(states_with_num.c.dbnode_id).alias()

        # Join the most-recent-state table with the DbCalcState table
        all_states_q = select([
            DbCalcState.dbnode_id.label('dbnode_id'),
            DbCalcState.state.label('state_string'),
            calc_state_num.c.recent_state.label('recent_state'),
            custom_sort_order.label('num_state'),
        ]).select_from(  #DbCalcState).alias().join(
            join(DbCalcState, calc_state_num,
                 DbCalcState.dbnode_id == calc_state_num.c.dbnode_id)).alias()

        # Get the association between each calc and only its corresponding most-recent-state row
        subq = select([
            all_states_q.c.dbnode_id.label('dbnode_id'),
            all_states_q.c.state_string.label('state')
        ]).select_from(all_states_q).where(
            all_states_q.c.num_state == all_states_q.c.recent_state).alias()

        # Final filtering for the actual query
        return select([subq.c.state]).\
            where(
                    subq.c.dbnode_id == cls.id,
                ).\
            label('laststate')
from tasks.backend import engine_md
from tasks.merge.code_mapping import update_from_info_table
from ibats_utils.db import with_db_session, add_col_2_table, alter_table_2_myisam, \
    bunch_insert_on_duplicate_update
from tasks.tushare.ts_pro_api import pro

DEBUG = False
logger = logging.getLogger()
DATE_BASE = datetime.strptime('2005-01-01', STR_FORMAT_DATE).date()
ONE_DAY = timedelta(days=1)
# 标示每天几点以后下载当日行情数据
BASE_LINE_HOUR = 16
STR_FORMAT_DATE_TS = '%Y%m%d'

INDICATOR_PARAM_LIST_TUSHARE_STOCK_PLEDGE_STAT = [
    ('ts_code', String(20)),
    ('end_date', Date),
    ('pledge_count', Integer),
    ('unrest_pledge', DOUBLE),
    ('rest_pledge', DOUBLE),
    ('total_share', DOUBLE),
    ('pledge_ratio', DOUBLE),
]
# 设置 dtype
DTYPE_TUSHARE_STOCK_PLEDGE_STAT = {
    key: val
    for key, val in INDICATOR_PARAM_LIST_TUSHARE_STOCK_PLEDGE_STAT
}


@try_n_times(times=5,
Beispiel #21
0
 def coerce_compared_value(self, op, value):
     if op in (operators.like_op, operators.notlike_op):
         return String()
     else:
         return self
def upgrade():
    schema = config['schema']
    staticschema = config['schema_static']
    parentschema = config.get('parentschema')

    engine = op.get_bind().engine
    if type(engine).__name__ != 'MockConnection' and \
            op.get_context().dialect.has_table(
                engine, 'user', schema=staticschema):  # pragma: no cover
        return

    op.create_table(
        'user',
        Column('type', String(10), nullable=False),
        Column('id', Integer, primary_key=True),
        Column('username', Unicode, unique=True, nullable=False),
        Column('password', Unicode, nullable=False),
        Column('email', Unicode, nullable=False),
        Column('is_password_changed', Boolean, default=False),
        Column('role_name', String),
        schema=staticschema,
    )
    parent_column = ''
    parent_select = ''
    parent_join = ''
    if parentschema is not None and parentschema != '':  # pragma: no cover
        op.add_column(
            'user',
            Column('parent_role_name', String),
            schema=staticschema
        )
        parent_column = ', parent_role_name'
        parent_select = ', pr.name'
        parent_join = (
            'LEFT OUTER JOIN {parentschema!s}.role AS pr ON (pr.id = u.parent_role_id)'.format(
                parentschema=parentschema,
            )
        )

    try:
        op.execute(
            'INSERT INTO %(staticschema)s.user '
            '(type, username, password, email, is_password_changed, role_name%(parent_column)s) ('
            'SELECT u.type, u.username, u.password, u.email, '
            'u.is_password_changed, r.name%(parent_select)s '
            'FROM %(schema)s.user AS u '
            'LEFT OUTER JOIN %(schema)s.role AS r ON (r.id = u.role_id) %(parent_join)s'
            ')' % {
                'staticschema': staticschema,
                'schema': schema,
                'parent_select': parent_select,
                'parent_column': parent_column,
                'parent_join': parent_join,
            }
        )
        op.drop_table('user', schema=schema)
    except Exception:
        op.execute(
            "INSERT INTO %(staticschema)s.user (type, username, email, password, role) "
            "VALUES ( 'user', 'admin', '*****@*****.**', '%(pass)s', 'role_admin')" % {
                'staticschema': staticschema,
                'pass': sha1('admin'.encode('utf-8')).hexdigest()
            }
        )
Beispiel #23
0
 def load_dialect_imp(self, dialect):
     return dialect.type_descriptor(String(255))
from tasks.backend import engine_md
from tasks.merge.code_mapping import update_from_info_table
from ibats_utils.db import with_db_session, add_col_2_table, alter_table_2_myisam, \
    bunch_insert_on_duplicate_update
from tasks.tushare.ts_pro_api import pro

DEBUG = False
logger = logging.getLogger()
DATE_BASE = datetime.strptime('2005-01-01', STR_FORMAT_DATE).date()
ONE_DAY = timedelta(days=1)
# 标示每天几点以后下载当日行情数据
BASE_LINE_HOUR = 16
STR_FORMAT_DATE_TS = '%Y%m%d'

INDICATOR_PARAM_LIST_TUSHARE_STOCK_COMPANY = [
    ('ts_code', String(20)),
    ('chairman', String(200)),
    ('manager', String(200)),
    ('secretary', String(200)),
    ('reg_capital', DOUBLE),
    ('setup_date', Date),
    ('province', String(100)),
    ('city', String(200)),
    ('introduction', Text),
    ('website', String(100)),
    ('email', String(100)),
    ('office', String(200)),
    ('employees', Integer),
    ('main_business', Text),
    ('business_scope', Text),
]
Beispiel #25
0
class EmailVerification(Base):
    __tablename__ = 'email_verifications'

    id = Column(String(32), primary_key=True)
    _created = Column('created',
                      DateTime,
                      default=datetime.utcnow,
                      nullable=False)
    _expiry = Column('expiry', DateTime, nullable=False)
    email_ref = Column(Unicode(200),
                       ForeignKey('email_addresses.email',
                                  onupdate='RESTRICT',
                                  ondelete='CASCADE'),
                       nullable=False)

    # email defined as backref on EmailAddress

    def __init__(self, email):
        super(EmailVerification, self).__init__()
        if DBSession.query(EmailVerification).\
                filter(EmailVerification.email_ref == email).\
                filter(EmailVerification.created > (utcnow() -
                    timedelta(seconds=VERIFICATION_INTERVAL))).first():
            raise VerificationTooFast(
                'A verification was requested for that '
                'email address less than %d seconds ago' %
                VERIFICATION_INTERVAL)
        if DBSession.query(EmailVerification).\
                filter(EmailVerification.email_ref == email).\
                filter(EmailVerification.expiry > utcnow()).count() >= VERIFICATION_LIMIT:
            raise VerificationTooMany('Too many active verifications '
                                      'currently exist for this account')
        self.email_ref = email
        self.expiry = utcnow() + timedelta(seconds=VERIFICATION_TIMEOUT)
        self.id = os.urandom(self.__table__.c.id.type.length //
                             2).encode('hex')

    def __repr__(self):
        return ('<EmailVerification: id="%s">' % self.id).encode('utf-8')

    def __str__(self):
        return unicode(self).encode('utf-8')

    def __unicode__(self):
        return self.id

    def _get_created(self):
        if self._created is None:
            return None
        if self._created.tzinfo is None:
            return pytz.utc.localize(self._created)
        else:
            return self._created.astimezone(pytz.utc)

    def _set_created(self, value):
        if value.tzinfo is None:
            self._created = value
        else:
            self._created = value.astimezone(pytz.utc).replace(tzinfo=None)

    created = synonym('_created',
                      descriptor=property(_get_created, _set_created))

    def _get_expiry(self):
        if self._expiry is None:
            return None
        if self._expiry.tzinfo is None:
            return pytz.utc.localize(self._expiry)
        else:
            return self._expiry.astimezone(pytz.utc)

    def _set_expiry(self, value):
        if value.tzinfo is None:
            self._expiry = value
        else:
            self._expiry = value.astimezone(pytz.utc).replace(tzinfo=None)

    expiry = synonym('_expiry', descriptor=property(_get_expiry, _set_expiry))

    @classmethod
    def by_id(cls, id):
        """return the email verification record with id ``id``"""
        return DBSession.query(cls).filter_by(id=id).first()

    def verify(self):
        if utcnow() > self.expiry:
            raise VerificationError('Verification code has expired')
        self.email.verified = utcnow()
        DBSession.query(EmailVerification).\
            filter(EmailVerification.email_ref == self.email_ref).delete()
Beispiel #26
0
 class A( self.Entity ):
     name = Field(String(60), unique=True)
     bs = OneToMany('B')
Beispiel #27
0
class PasswordReset(Base):
    __tablename__ = 'password_resets'

    id = Column(String(32), primary_key=True)
    _created = Column('created',
                      DateTime,
                      default=datetime.utcnow,
                      nullable=False)
    _expiry = Column('expiry', DateTime, nullable=False)
    user_id = Column(Integer,
                     ForeignKey('users.id',
                                onupdate='RESTRICT',
                                ondelete='CASCADE'),
                     nullable=False)

    # user defined as backref on User

    def __init__(self, user):
        super(PasswordReset, self).__init__(**kwargs)
        if DBSession.query(PasswordReset).\
            filter(PasswordReset.user_id == user.id).\
            filter(PasswordReset.created > (utcnow() -
                    timedelta(seconds=RESET_INTERVAL))).first():
            raise ResetError('A reset was requested for that '
                             'account less than %d seconds ago' %
                             RESET_INTERVAL)
        if DBSession.query(PasswordReset).\
            filter(PasswordReset.expiry > utcnow()).\
            filter(PasswordReset.user_id == user.id).count() >= RESET_LIMIT:
            raise ResetError('Too many active resets currently '
                             'exist for this account')
        self.user_id = user.id
        self.expiry = utcnow() + timedelta(seconds=RESET_TIMEOUT)
        self.id = os.urandom(self.__table__.c.id.type.length //
                             2).encode('hex')

    def __repr__(self):
        return ('<PasswordReset: id="%s">' % self.id).encode('utf-8')

    def __str__(self):
        return unicode(self).encode('utf-8')

    def __unicode__(self):
        return self.id

    def _get_created(self):
        if self._created is None:
            return None
        if self._created.tzinfo is None:
            return pytz.utc.localize(self._created)
        else:
            return self._created.astimezone(pytz.utc)

    def _set_created(self, value):
        if value.tzinfo is None:
            self._created = value
        else:
            self._created = value.astimezone(pytz.utc).replace(tzinfo=None)

    created = synonym('_created',
                      descriptor=property(_get_created, _set_created))

    def _get_expiry(self):
        if self._expiry is None:
            return None
        if self._expiry.tzinfo is None:
            return pytz.utc.localize(self._expiry)
        else:
            return self._expiry.astimezone(pytz.utc)

    def _set_expiry(self, value):
        if value.tzinfo is None:
            self._expiry = value
        else:
            self._expiry = value.astimezone(pytz.utc).replace(tzinfo=None)

    expiry = synonym('_expiry', descriptor=property(_get_expiry, _set_expiry))

    @classmethod
    def by_id(cls, id):
        """return the password reset record with id ``id``"""
        return DBSession.query(cls).filter_by(id=id).first()

    def reset_password(self, user, new_password):
        if utcnow() > self.expiry:
            raise ResetError('Reset code has expired')
        if user is not self.user:
            raise ResetError('Invalid user for reset code %s' % self.id)
        self.user.password = new_password
        DBSession.query(PasswordReset).\
            filter(PasswordReset.user_id == self.user_id).delete()
Beispiel #28
0
 class A( self.Entity ):
     name = Field(String(60))
     bs = OneToMany('B')
Beispiel #29
0
class User(Base):
    __tablename__ = 'users'

    id = Column(Integer, primary_key=True)
    salutation = Column(Unicode(10), nullable=False)
    given_name = Column(Unicode(200), nullable=False)
    surname = Column(Unicode(200), nullable=False)
    organization = Column(Unicode(200), default='', nullable=False)
    _password = Column('password', String(200))
    _password_changed = Column('password_changed',
                               DateTime,
                               default=datetime.utcnow,
                               nullable=False)
    resets = relationship(PasswordReset,
                          backref='user',
                          cascade='all, delete-orphan',
                          passive_deletes=True)
    _created = Column('created',
                      DateTime,
                      default=datetime.utcnow,
                      nullable=False)
    timezone_name = Column('timezone',
                           Unicode(max(len(t) for t in pytz.all_timezones)),
                           default='UTC',
                           nullable=False)
    emails = relationship(EmailAddress,
                          backref='user',
                          cascade='all, delete-orphan',
                          passive_deletes=True)
    limits_id = Column(Unicode(20),
                       ForeignKey('user_limits.id',
                                  onupdate='RESTRICT',
                                  ondelete='RESTRICT'),
                       nullable=False)
    # limits defined as backref on UserLimit
    templates = relationship(LabelTemplate, backref='creator')
    # user_collections defined as backref on UserCollection
    collections = association_proxy(
        'user_collections',
        'role',
        creator=lambda k, v: UserCollection(collection=k, role=v))
    # user_groups defined as backref on Group
    groups = association_proxy('user_groups', 'id')

    def __repr__(self):
        return ('<User: name="%s">' % ' '.join(
            (self.salutation, self.given_name, self.surname))).encode('utf-8')

    def __str__(self):
        return unicode(self).encode('utf-8')

    def __unicode__(self):
        return ' '.join((self.salutation, self.given_name, self.surname))

    def _get_created(self):
        if self._created is None:
            return None
        if self._created.tzinfo is None:
            return pytz.utc.localize(self._created)
        else:
            return self._created.astimezone(pytz.utc)

    def _set_created(self, value):
        if value.tzinfo is None:
            self._created = value
        else:
            self._created = value.astimezone(pytz.utc).replace(tzinfo=None)

    created = synonym('_created',
                      descriptor=property(_get_created, _set_created))

    def _get_password_changed(self):
        if self._password_changed is None:
            return None
        if self._password_changed.tzinfo is None:
            return pytz.utc.localize(self._password_changed)
        else:
            return self._password_changed.astimezone(pytz.utc)

    def _set_password_changed(self, value):
        if value.tzinfo is None:
            self._password_changed = value
        else:
            self._password_changed = value.astimezone(
                pytz.utc).replace(tzinfo=None)

    password_changed = synonym('_password_changed',
                               descriptor=property(_get_password_changed,
                                                   _set_password_changed))

    @classmethod
    def by_id(cls, id):
        """return the user with id ``id``"""
        return DBSession.query(cls).filter_by(id=id).first()

    @classmethod
    def by_email(cls, email):
        """return the user with an email ``email``"""
        return DBSession.query(cls).join(EmailAddress).\
            filter(EmailAddress.email == email).\
            filter(EmailAddress.verified != None).first()

    def _get_timezone(self):
        """Return the timezone object corresponding to the name"""
        return pytz.timezone(self.timezone_name)

    def _set_timezone(self, value):
        """Set the timezone to the name of the timezone object"""
        self.timezone_name = value.zone

    timezone = synonym('timezone_name',
                       descriptor=property(_get_timezone, _set_timezone))

    def _set_password(self, password):
        """Store a hashed version of password"""
        self._password = PASSWORD_CONTEXT.encrypt(password)
        self.password_changed = utcnow()

    def _get_password(self):
        """Return the hashed version of the password"""
        return self._password

    password = synonym('_password',
                       descriptor=property(_get_password, _set_password))

    def authenticate(self, password):
        """Check the password against existing credentials"""
        # We call verify_and_update here in case we've defined any new
        # (hopefully stronger) algorithms in the context above. If so, this'll
        # take care of migrating users as they login
        (result, new_password) = PASSWORD_CONTEXT.verify_and_update(
            password, self._password)
        if result and new_password:
            self._password = new_password
        return result

    @property
    def full_name(self):
        return ' '.join((
            self.salutation,
            self.given_name,
            self.surname,
        ))

    @property
    def verified_emails(self):
        # XXX Do this with a query
        return [email for email in self.emails if email.verified]

    @property
    def editable_collections(self):
        # XXX Do this with a query
        return [
            collection for collection, role in self.collections.items()
            if role.id in ('editor', 'owner')
        ]

    @property
    def owned_samples(self):
        # XXX Do this with a query
        return [
            sample for collection in self.editable_collections
            for sample in collection.all_samples
        ]

    @property
    def storage_used(self):
        # XXX Do this with a query
        return sum(sample.attachments.storage_used
                   for sample in self.owned_samples)
Beispiel #30
0
def import_future_min(chain_param=None,
                      wind_code_set=None,
                      begin_time=None,
                      recent_n_years=2):
    """
    更新期货合约分钟级别行情信息
    请求语句类似于:
    THS_HF('CU2105.SHF','open;high;low;close;volume;amount;change;changeRatio;sellVolume;buyVolume;openInterest',
        'Fill:Original','2021-01-18 09:15:00','2021-01-18 15:15:00')
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :param wind_code_set:  只道 ths_code 集合
    :param begin_time:  最早的起始日期
    :param recent_n_years:  忽略n年前的合约
    :return:
    """
    # global DEBUG
    # DEBUG = True
    table_name = "ifind_future_min"
    logger.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    param_list = [
        ("open", DOUBLE),
        ("high", DOUBLE),
        ("low", DOUBLE),
        ("close", DOUBLE),
        ("volume", DOUBLE),
        ("amount", DOUBLE),
        ("change", DOUBLE),
        ("changeRatio", DOUBLE),
        ("sellVolume", DOUBLE),
        ("buyVolume", DOUBLE),
        ("openInterest", DOUBLE),
    ]
    ifind_indicator_str = ";".join([key for key, _ in param_list])

    if has_table:
        sql_str = f"""
        select ths_code, date_frm, if(lasttrade_date<end_date, lasttrade_date, end_date) date_to
        FROM
        (
            select fi.ths_code, 
                ifnull(trade_date_max_1, addtime(ths_start_trade_date_future,'09:00:00')) date_frm, 
                addtime(ths_last_td_date_future,'15:00:00') lasttrade_date,
                case 
                    when hour(now())>=23 then DATE_FORMAT(now(),'%Y-%m-%d 23:00:00') 
                    when hour(now())>=15 then DATE_FORMAT(now(),'%Y-%m-%d 15:00:00') 
                    when hour(now())>=12 then DATE_FORMAT(now(),'%Y-%m-%d 12:00:00') 
                    else DATE_FORMAT(now(),'%Y-%m-%d 03:00:00') 
                end end_date
            from ifind_future_info fi 
            left outer join
            (
                select ths_code, addtime(max(trade_datetime),'00:00:01') trade_date_max_1 
                from {table_name} group by ths_code
            ) wfd
            on fi.ths_code = wfd.ths_code
        ) tt
        where date_frm <= if(lasttrade_date<end_date, lasttrade_date, end_date) 
        -- and subdate(curdate(), 360) < if(lasttrade_date<end_date, lasttrade_date, end_date) 
        order by date_to desc, date_frm"""
    else:
        sql_str = """
        SELECT ths_code, date_frm,
            if(lasttrade_date<end_date,lasttrade_date, end_date) date_to
        FROM
        (
            SELECT info.ths_code,
            addtime(ths_start_trade_date_future,'09:00:00') date_frm, 
            addtime(ths_last_td_date_future,'15:00:00')  lasttrade_date,
            case 
                when hour(now())>=23 then DATE_FORMAT(now(),'%Y-%m-%d 23:00:00') 
                when hour(now())>=15 then DATE_FORMAT(now(),'%Y-%m-%d 15:00:00') 
                when hour(now())>=12 then DATE_FORMAT(now(),'%Y-%m-%d 12:00:00') 
                else DATE_FORMAT(now(),'%Y-%m-%d 03:00:00') 
            end end_date
            FROM ifind_future_info info
        ) tt
        WHERE date_frm <= if(lasttrade_date<end_date, lasttrade_date, end_date)
        ORDER BY date_to desc, date_frm"""
        logger.warning('%s 不存在,仅使用 wind_future_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        table = session.execute(sql_str)
        # 获取date_from,date_to,将date_from,date_to做为value值
        future_date_dic = {
            ths_code: (str_2_datetime(date_from) if begin_time is None else
                       min([str_2_datetime(date_from), begin_time]),
                       str_2_datetime(date_to))
            for ths_code, date_from, date_to in table.fetchall()
            if wind_code_set is None or ths_code in wind_code_set
        }

    # 设置 dtype
    dtype = {key: val for key, val in param_list}
    dtype['ths_code'] = String(20)
    dtype['instrument_id'] = String(20)
    dtype['trade_date'] = Date
    dtype['trade_datetime'] = DateTime

    # 定义统一的插入函数
    def insert_db(df: pd.DataFrame):
        insert_data_count = bunch_insert_on_duplicate_update(
            df,
            table_name,
            engine_md,
            dtype=dtype,
            primary_keys=['ths_code', 'trade_datetime'],
            schema=config.DB_SCHEMA_MD)
        return insert_data_count

    data_df_list = []
    future_count = len(future_date_dic)
    bulk_data_count, tot_data_count = 0, 0
    # 忽略更早的历史合约
    ignore_before = pd.to_datetime(date.today() -
                                   timedelta(days=int(365 * recent_n_years))
                                   ) if recent_n_years is not None else None
    try:
        logger.info("%d future instrument will be handled", future_count)
        for num, (ths_code, (date_frm,
                             date_to)) in enumerate(future_date_dic.items(),
                                                    start=1):
            # 暂时只处理 RU 期货合约信息
            # if ths_code.find('RU') == -1:
            #     continue
            if not (0 <= (date_to - date_frm).days < 800):
                continue

            if ignore_before is not None and pd.to_datetime(
                    date_frm) < ignore_before:
                # 忽略掉 n 年前的合约
                continue
            if isinstance(date_frm, datetime):
                date_frm_str = date_frm.strftime(STR_FORMAT_DATETIME)
            elif isinstance(date_frm, str):
                date_frm_str = date_frm
            else:
                date_frm_str = date_frm.strftime(STR_FORMAT_DATE) + ' 09:00:00'

            # 结束时间到次日的凌晨5点
            if isinstance(date_frm, datetime):
                date_to_str = date_to.strftime(STR_FORMAT_DATETIME)
            elif isinstance(date_to, str):
                date_to_str = date_to
            else:
                date_to += timedelta(days=1)
                date_to_str = date_to.strftime(STR_FORMAT_DATE) + ' 03:00:00'

            logger.info('%d/%d) get %s between %s and %s', num, future_count,
                        ths_code, date_frm_str, date_to_str)
            try:
                data_df = invoker.THS_HighFrequenceSequence(
                    ths_code, ifind_indicator_str, 'Fill:Original',
                    date_frm_str, date_to_str)
            except APIError as exp:
                from tasks.ifind import ERROR_CODE_MSG_DIC, NO_BREAK_ERROR_CODE
                error_code = exp.ret_dic.setdefault('error_code', 0)
                if error_code in ERROR_CODE_MSG_DIC:
                    logger.warning("%d/%d) %s 执行异常 error_code=%d, %s", num,
                                   future_count, ths_code, error_code,
                                   ERROR_CODE_MSG_DIC[error_code])
                else:
                    logger.exception("%d/%d) %s 执行异常 error_code=%d", num,
                                     future_count, ths_code, error_code)

                if error_code in NO_BREAK_ERROR_CODE:
                    continue
                else:
                    break
            if data_df is None:
                logger.warning('%d/%d) %s has no data during %s %s', num,
                               future_count, ths_code, date_frm_str, date_to)
                continue
            logger.info('%d/%d) %d data of %s between %s and %s', num,
                        future_count, data_df.shape[0], ths_code, date_frm_str,
                        date_to)
            # data_df['ths_code'] = ths_code
            data_df.rename(columns={
                'time': 'trade_datetime',
                'thscode': 'ths_code',
            },
                           inplace=True)
            data_df['trade_date'] = pd.to_datetime(
                data_df['trade_datetime']).apply(lambda x: x.date())
            data_df.rename(columns={c: str.lower(c)
                                    for c in data_df.columns},
                           inplace=True)
            data_df['instrument_id'] = ths_code.split('.')[0]
            data_df_list.append(data_df)
            bulk_data_count += data_df.shape[0]
            # 仅仅调试时使用
            if DEBUG and len(data_df_list) >= 1:
                break
            if bulk_data_count > 50000:
                logger.info('merge data with %d df %d data', len(data_df_list),
                            bulk_data_count)
                data_df = pd.concat(data_df_list)
                tot_data_count = insert_db(data_df)
                logger.info("更新 %s,累计 %d 条记录被更新", table_name, tot_data_count)
                data_df_list = []
                bulk_data_count = 0
    finally:
        data_df_count = len(data_df_list)
        if data_df_count > 0:
            logger.info('merge data with %d df %d data', len(data_df_list),
                        bulk_data_count)
            data_df = pd.concat(data_df_list)
            tot_data_count += insert_db(data_df)

        logger.info("更新 %s 结束 累计 %d 条记录被更新", table_name, tot_data_count)
Beispiel #31
0
class SpiderWebhook(Base):
    __tablename__ = 'spider_webhook'

    id = Column(String(length=50), primary_key=True)
    payload_url = Column(String(length=250))