Beispiel #1
0
  def summarize_query_alignments(self):
    """For each query, summarize the number and quality of alignments
    THIS METHOD IS TOO SLOW!
    """
    return

    query_maps = self.get_query_maps()
    with switch_collection(Alignment, self.alignment_collection) as A, \
         switch_collection(Map, self.map_collection) as M:

      n = query_maps.count()
      for i, q in enumerate(query_maps):
        print 'working on map %i of %i %s'%(i+1, n, q.name)
        aln_summary_doc = MapAlignmentSummary()
        query_alns = A.objects.filter(query_id = q.name).order_by('total_score_rescaled')
        aln_count = query_alns.count()
        aln_summary_doc.aln_count = aln_count

        if aln_count > 0:

          best_aln = query_alns[0]

          # Cast to Alignment Embedded
          # best_aln = deepcopy(best_aln)
          # best_aln.__class__ = AlignmentEmbedded
          aln_summary_doc.best_aln = AlignmentEmbedded(**best_aln._data)
          aln_summary_doc.best_m_score = best_aln.m_score
          aln_summary_doc.best_query_miss_rate = best_aln.query_miss_rate
          aln_summary_doc.best_ref_miss_rate = best_aln.ref_miss_rate
          aln_summary_doc.best_query_scaling_factor = best_aln.query_scaling_factor

        q.alignment_summary = aln_summary_doc
        q.save()
def update_symptom_codes():
  """
  Update symptom codes from the old format to the new format.
   - Key difference: Do not use symptom code as a primary key anymore, since the codes have been deprected
  """

  from dcmetrometrics.eles import models, defs
  from mongoengine.context_managers import switch_db, switch_collection

  # Fish out symptom codes in the old format from the symptom_code collection
  # Backup to the symptom_code_old collection
  with switch_collection(models.SymptomCodeOld, "symptom_codes") as SymptomCodeOld:
    old_symptoms = list(models.SymptomCodeOld.objects)
    for s in old_symptoms:
      # Make a backup of the old symptom codes
      s.switch_collection('symptom_codes_old')
      s.save() # Save to the new collection

  # Remove the symptom collection - out with the old, in with the new!
  models.SymptomCode.drop_collection() # Clears the "symptom_code" collection

  with switch_collection(models.SymptomCodeOld, "symptom_codes_old") as SymptomCodeOld:
    s_old = list(SymptomCodeOld.objects)
    for s in s_old:
      s_new = s.make_new_format()
      if not s_new.category:
        s_new.category = defs.symptomToCategory[s_new.description]
      print "saving: ", s_new
      s_new.save()
Beispiel #3
0
def process_one_file(filepath, dir_path, filename, cover_mode):
    '''
	'对普通文件进行处理
	'若该文件记录已在db中存在,按照cover_mode进行覆盖或跳过操作,若文件在db中不存在,则添加到db中
	'''
    global Mybucket
    dirId = getDirId(dir_path)  # 获取文件所在目录的id
    if isFileExists(filename, dirId):  # 若文件在db已存在
        if cover_mode:
            with switch_collection(Mybucket, collection_name) as Mybucket:
                for u in Mybucket.objects(Q(na=filename)
                                          & Q(did=dirId)):  # 删除原记录和对象
                    object_to_delete = getObjectId(filename, dirId)  #对象名
                    delete_object(str(object_to_delete))  #删除rados对象
                    u.delete()
                size = os.path.getsize(filepath)  # 获取文件大小,单位字节
                Mybucket(na=filename, fod=True, did=dirId,
                         si=size).save()  # 添加新纪录
                obj_name = getObjectId(filename, dirId)
                fo = open(filepath, 'rb')
                storeToRados(str(obj_name), fo, size)  #写入rados
                fo.close()

    else:
        size = os.path.getsize(filepath)  # 获取文件大小,单位字节
        with switch_collection(Mybucket, collection_name) as Mybucket:
            Mybucket(na=filename, fod=True, did=dirId, si=size).save()  # 添加新纪录
            obj_name = getObjectId(filename, dirId)
            fo = open(filepath, 'rb')
            storeToRados(str(obj_name), fo, size)  #写入rados
            fo.close()
    def test_switch_collection_context_manager(self):
        connect('mongoenginetest')
        register_connection('testdb-1', 'mongoenginetest2')

        class Group(Document):
            name = StringField()

        Group.drop_collection()
        with switch_collection(Group, 'group1') as Group:
            Group.drop_collection()

        Group(name="hello - group").save()
        self.assertEqual(1, Group.objects.count())

        with switch_collection(Group, 'group1') as Group:

            self.assertEqual(0, Group.objects.count())

            Group(name="hello - group1").save()

            self.assertEqual(1, Group.objects.count())

            Group.drop_collection()
            self.assertEqual(0, Group.objects.count())

        self.assertEqual(1, Group.objects.count())
Beispiel #5
0
    def test_switch_collection_context_manager(self):
        clear_document_registry()
        connect("mongoenginetest")
        register_connection(alias="testdb-1", db="mongoenginetest2")

        class Group(Document):
            name = StringField()

        Group.drop_collection()  # drops in default

        with switch_collection(Group, "group1") as Group:
            Group.drop_collection()  # drops in group1

        Group(name="hello - group").save()
        assert 1 == Group.objects.count()

        with switch_collection(Group, "group1") as Group:

            assert 0 == Group.objects.count()

            Group(name="hello - group1").save()

            assert 1 == Group.objects.count()

            Group.drop_collection()
            assert 0 == Group.objects.count()

        assert 1 == Group.objects.count()
    def test_switch_collection_context_manager(self):
        connect('mongoenginetest')
        register_connection('testdb-1', 'mongoenginetest2')

        class Group(Document):
            name = StringField()

        Group.drop_collection()
        with switch_collection(Group, 'group1') as Group:
            Group.drop_collection()

        Group(name="hello - group").save()
        self.assertEqual(1, Group.objects.count())

        with switch_collection(Group, 'group1') as Group:

            self.assertEqual(0, Group.objects.count())

            Group(name="hello - group1").save()

            self.assertEqual(1, Group.objects.count())

            Group.drop_collection()
            self.assertEqual(0, Group.objects.count())

        self.assertEqual(1, Group.objects.count())
Beispiel #7
0
 def delete(self, *args, **kwargs):
   """Delete the experiment instance and related collections"""
   with switch_collection(Alignment, self.alignment_collection) as A:
     A.drop_collection()
   with switch_collection(Map, self.map_collection) as M:
     M.drop_collection()
   Document.delete(self, *args, **kwargs)
def get_data_rumor(ID):
    with switch_collection(Stock, 'TRD_T') as StockS:
        data = StockS.objects(Stkcd=ID).all()
        date = [x['Trddt'] for x in data]
        # data_temp = [[x['Opnprc'], x['Hiprc'], x['Loprc'], x['Clsprc'], x['Dnshrtrd'], x['Dnvaltrd'],
        #                         x['Dsmvosd'], x['Dsmvtll'], x['Dretwd'], x['Dretnd'], x['Adjprcwd'], x['Adjprcnd'],
        #                         x['Markettype'], x['Trdsta']] for x in data]
        data_temp = [[
            x['Opnprc'], x['Hiprc'], x['Loprc'], x['Clsprc'], x['Dnshrtrd'],
            x['Dnvaltrd'], x['Dsmvosd'], x['Dsmvtll'], x['Dretwd'],
            x['Adjprcwd']
        ] for x in data]

        data_form = form_data(data_temp)
        """
        data_form = form_data([[x['Opnprc'], x['Hiprc'], x['Loprc'], x['Clsprc']] for x in data])
        """
        x_train = []
        y_train = []
        length = len(data_form)
        with switch_collection(Rumor, 'TRD_rumor') as RumorS:
            rumor = RumorS.objects(Stkcd=ID).all()
            rumor_dict = {}
            for item in rumor:
                rumor_dict.setdefault(item['Qdate'].replace('/', '-'), [
                    item['QLabel'], item['QPositive'], item['QNegtive'],
                    item['ALabel'], item['APositive'], item['ANegtive']
                ])
            for i in range(length):
                if i < length - 5:
                    temp_time = []
                    for j in range(5):
                        daily = copy.deepcopy(data_form[i + j])
                        if date[i + j] in rumor_dict:
                            if rumor_dict[date[i + j]][0]:
                                daily.extend(rumor_dict[date[i + j]])
                            else:
                                daily.extend(rumor_dict[date[i + j]])
                        else:
                            daily.append(0)
                            daily.append(0)
                            daily.append(0)
                            daily.append(0)
                            daily.append(0)
                            daily.append(0)
                        temp_time.append(daily)
                    x_train.append(temp_time)
                    # if (data_temp[i + 0][3] + data_temp[i + 1][3] + data_temp[i + 2][3] + data_temp[i + 3][3] + data_temp[i + 4][3] + data_temp[i + 5][3] + data_temp[i + 6][3] + data_temp[i + 7][3]+ data_temp[i + 8][3] + data_temp[i + 9][3] + data_temp[i + 10][3] + data_temp[i + 11][3] + data_temp[i + 12][3] + data_temp[i + 13][3] + data_temp[i + 14][3]) < \
                    #         (data_temp[i + 15][3] + data_temp[i + 16][3] + data_temp[i + 17][3] + data_temp[i + 18][3] + data_temp[i + 19][3] + data_temp[i + 20][3] + data_temp[i + 21][3] + data_temp[i + 22][3]+ data_temp[i + 23][3] + data_temp[i + 24][3] + data_temp[i + 25][3] + data_temp[i + 26][3] + data_temp[i + 27][3] + data_temp[i + 28][3] + data_temp[i + 29][3]):
                    if data_temp[i + 4][3] > data_temp[i + 5][3]:
                        y_train.append(0)
                    else:
                        y_train.append(1)
            return x_train, y_train
Beispiel #9
0
def load(filename=DEFAULT_GEOZONES_FILE, drop=False):
    '''
    Load a geozones archive from <filename>

    <filename> can be either a local path or a remote URL.
    '''
    ts = datetime.now().isoformat().replace('-', '').replace(':', '').split('.')[0]
    prefix = 'geozones-{0}'.format(ts)
    if filename.startswith('http'):
        log.info('Downloading GeoZones bundle: %s', filename)
        # Use tmp.open to make sure that the directory exists in FS
        with tmp.open(GEOZONE_FILENAME, 'wb') as newfile:
            newfile.write(requests.get(filename).content)
            filename = tmp.path(GEOZONE_FILENAME)

    log.info('Extracting GeoZones bundle')
    with handle_error(prefix):
        with contextlib.closing(lzma.LZMAFile(filename)) as xz:
            with tarfile.open(fileobj=xz) as f:
                f.extractall(tmp.path(prefix))

    log.info('Loading GeoZones levels')

    log.info('Loading levels.msgpack')
    levels_filepath = tmp.path(prefix + '/levels.msgpack')
    if drop and GeoLevel.objects.count():
        name = '_'.join((GeoLevel._get_collection_name(), ts))
        target = GeoLevel._get_collection_name()
        with switch_collection(GeoLevel, name):
            with handle_error(prefix, GeoLevel):
                total = load_levels(GeoLevel, levels_filepath)
                GeoLevel.objects._collection.rename(target, dropTarget=True)
    else:
        with handle_error(prefix):
            total = load_levels(GeoLevel, levels_filepath)
    log.info('Loaded {total} levels'.format(total=total))

    log.info('Loading zones.msgpack')
    zones_filepath = tmp.path(prefix + '/zones.msgpack')
    if drop and GeoZone.objects.count():
        name = '_'.join((GeoZone._get_collection_name(), ts))
        target = GeoZone._get_collection_name()
        with switch_collection(GeoZone, name):
            with handle_error(prefix, GeoZone):
                total = load_zones(GeoZone, zones_filepath)
                GeoZone.objects._collection.rename(target, dropTarget=True)
    else:
        with handle_error(prefix):
            total = load_zones(GeoZone, zones_filepath)
    log.info('Loaded {total} zones'.format(total=total))

    cleanup(prefix)
def get_data(ID):
    with switch_collection(Stock, 'TRD_old') as StockS:
        data = StockS.objects(Stkcd=ID).all()
        data_temp = [[
            x['Opnprc'], x['Hiprc'], x['Loprc'], x['Clsprc'], x['Dnshrtrd'],
            x['Dnvaltrd'], x['Dsmvosd'], x['Dsmvtll'], x['Dretwd'],
            x['Dretnd'], x['Adjprcwd'], x['Adjprcnd'], x['Markettype'],
            x['Trdsta']
        ] for x in data]
        data_form = form_data(data_temp)
        """
        data_form = form_data([[x['Opnprc'], x['Hiprc'], x['Loprc'], x['Clsprc']] for x in data])
        """
        x_train = []
        y_train = []
        length = len(data_form)
        for i in range(length):
            if i < length - 10:
                temp_time = []
                for j in range(5):
                    temp_time.append(data_form[i + j])
                x_train.append(temp_time)
                # if data_temp[i+5][3] < data_temp[i+5][0]:
                if (data_temp[i + 0][3] + data_temp[i + 1][3] +
                        data_temp[i + 2][3] + data_temp[i + 3][3] +
                        data_temp[i + 4][3]) < (
                            data_temp[i + 5][3] + data_temp[i + 6][3] +
                            data_temp[i + 7][3] + data_temp[i + 8][3] +
                            data_temp[i + 9][3]):
                    y_train.append(0)
                else:
                    y_train.append(1)
        return x_train, y_train
Beispiel #11
0
    def insert_security_code(self, market, file_name, path):
        database = DatabaseName.INDEX_KLINE_DAILY.value
        with MongoConnect(database):
            print(path + file_name + '\n')
            kline_daily_data = pd.read_csv(path + file_name, encoding='unicode_escape')
            code = file_name.split('.')[0]
            code_transfer_dict = {'999999': '000001', '999998': '000002', '999997': '000003', '999996': '000004',
                                  '999995': '000005', '999994': '000006', '999993': '000007', '999992': '000008',
                                  '999991': '000010', '999990': '000011', '999989': '000012', '999988': '000013',
                                  '999987': '000016', '999986': '000015', '000300': '000300'}
            if market == 'SH':
                if code in code_transfer_dict.keys():
                    code = code_transfer_dict[code]
                else:
                    code = '00' + code[2:]
            security_code = code + '.' + market
            kline_daily_data = kline_daily_data.reindex(columns=['date', 'open', 'high', 'low', 'close', 'volumw',
                                                                 'turover', 'match_items', 'interest'])
            kline_daily_data.rename(columns={'volumw': 'volume', 'turover': 'amount'},  inplace=True)

            with switch_collection(Kline, security_code) as KlineDaily_security_code:
                doc_list = []
                for index, row in kline_daily_data.iterrows():
                    date_int = int(row['date'])
                    if not np.isnan(date_int):
                        date_int = str(date_int)
                        time_tag = datetime.strptime(date_int, "%Y%m%d")
                        doc = KlineDaily_security_code(time_tag=time_tag, pre_close=None,
                                                       open=int(row['open']), high=int(row['high']),
                                                       low=int(row['low']), close=int(row['close']),
                                                       volume=int(row['volume']), amount=int(row['amount']),
                                                       match_items=int(row['match_items']), interest=int(row['interest']))
                        doc_list.append(doc)

                KlineDaily_security_code.objects.insert(doc_list)
Beispiel #12
0
  def get_alignment_summary(self):
    with switch_collection(Alignment, self.alignment_collection) as A:

      A.ensure_indexes()

      res = A.objects.aggregate(
          { '$sort': { 'query_id': 1, 'total_score_rescaled': 1} },
          { '$group': {
              '_id': '$query_id',
              'aln_count': { '$sum': 1},
              'best_score': {'$first': '$total_score_rescaled'},
              'best_m_score': {'$first': "$m_score"},
              'best_query_miss_rate': {'$first': '$query_miss_rate'},
              'best_ref_miss_rate': {'$first': '$ref_miss_rate'},
              'best_query_scaling_factor' : {'$first': '$query_scaling_factor'}
            }
          },
        allowDiskUse = True);

      def fix_d(d):
        # Remap _id to query_id
        d['query_id'] = d['_id']
        del d['_id']
        return d

      return [fix_d(d) for d in res]
Beispiel #13
0
def density_near_query(output_path, dataset: DatasetEnum, ids_file):
    with open(ids_file, 'r') as f:
        ids = json.load(f)

    ds_ids = ids[dataset.value]

    density_records = []
    for photo_id in tqdm(ds_ids):
        q_photo = FlickrPhoto.objects(photo_id=photo_id).first()

        coords = q_photo.geo.coordinates
        thresholds = {
            "density_10m": 0.01,
            "density_100m": 0.1,
            "density_500m": 0.5,
            "density_1km": 1,
        }
        DensityRecord = make_dataclass("DensityRecord", [("photo_id", int),
                                                         ("density_10m", int),
                                                         ("density_100m", int),
                                                         ("density_500m", int),
                                                         ("density_1km", int)])
        densities = dict()
        with switch_collection(FlickrPhoto, "flickr.db1") as DbFlickrPhoto:
            for density_type, radius in thresholds.items():
                density = DbFlickrPhoto.count_photos_in_radius(coords, radius)
                densities[density_type] = density
        density_records.append(DensityRecord(photo_id, **densities))

    df = pd.DataFrame(density_records)
    df.to_csv(output_path, index=False)
Beispiel #14
0
    def get_histories(self, cname, **kwargs):
        cn = self.get_collection_name(cname)

        with switch_collection(History, cn) as _history:
            cursor = _history.objects.filter(**kwargs)
            result = cursor.order_by('-time')
        return result
Beispiel #15
0
    def update_index_data(self, end=datetime.now()):
        """

        :param end:
        :return:
        """
        get_collection_list = GetCollectionList()
        index_list = get_collection_list.get_index_list()
        self.end = end
        database = DatabaseName.INDEX_KLINE_DAILY.value
        with MongoConnect(database):
            index_data_dict = {}
            for index_code in index_list:
                with switch_collection(Kline, index_code) as KlineDaily_index_code:
                    security_code_data = KlineDaily_index_code.objects(time_tag__lte=self.end).as_pymongo()
                    security_code_data_df = pd.DataFrame(list(security_code_data)).reindex(columns=self.field)
                    security_code_data_df.set_index(["time_tag"], inplace=True)
                    index_data_dict[index_code] = security_code_data_df
        field_data_dict = {}
        for i in self.field:
            if i != 'time_tag':
                field_data_pd = pd.DataFrame({key: value[i] for key, value in index_data_dict.items()})
                # 原始数据的开高低收除以10000
                if i in ['open', 'high', 'low', 'close']:
                    field_data_dict[i] = field_data_pd.div(10000)
                else:
                    field_data_dict[i] = field_data_pd
        folder_name = LocalDataFolderName.MARKET_DATA.value
        sub_folder_name = LocalDataFolderName.KLINE_DAILY.value
        sub_sub_folder_name = LocalDataFolderName.INDEX.value
        for field in self.field:
            if field not in ['time_tag', 'interest']:
                path = LocalDataPath.path + folder_name + '/' + sub_folder_name + '/' + sub_sub_folder_name + '/'
                data_name = field
                save_data_to_hdf5(path, data_name, pd.DataFrame(field_data_dict[field]))
Beispiel #16
0
  def get_summary(self):
    """Return a summary of this experiment
    """
    ret = {}
    ret['name'] = self.name
    ret['description'] = self.description
    ret['created'] = self.created
    ret['num_query_maps'] = len(self.get_query_map_ids())
    ret['num_ref_maps'] = len(self.get_ref_map_ids())

    with switch_collection(Alignment, self.alignment_collection) as A:

      A.ensure_indexes()
      ret['num_alignments'] = A.objects.count()

      # # Get the number of alignments per query
      # alignments = A.objects.only('query_id')
      # query_id_counts = Counter(a.query_id for a in alignments)
      # aligned_queries = [{"query_id" : query_id, 
      #                     "aln_count": aln_count} for query_id, aln_count in query_id_counts.iteritems()]
      # ret['aligned_queries'] = aligned_queries

    ret['aligned_queries'] = self.get_alignment_summary()

    # Get a list of query_ids with the number of alignments for each query.
    # Mongoengine does not have good aggregation support so do it here in memory.
    return ret
Beispiel #17
0
    def save_tick_data(self,
                       ticks: List[TickData],
                       collection_name: str = None) -> bool:
        """"""
        for tick in ticks:
            tick.datetime = convert_tz(tick.datetime)

            d = tick.__dict__
            d["exchange"] = d["exchange"].value
            d["interval"] = d["interval"].value
            d.pop("gateway_name")
            d.pop("vt_symbol")
            param = to_update_param(d)
            if not collection_name:
                DbTickData.objects(
                    symbol=d["symbol"],
                    exchange=d["exchange"],
                    datetime=d["datetime"],
                ).update_one(upsert=True, **param)
            else:
                with switch_collection(DbTickData, collection_name):
                    DbTickData.objects(
                        symbol=d["symbol"],
                        exchange=d["exchange"],
                        datetime=d["datetime"],
                    ).update_one(upsert=True, **param)
Beispiel #18
0
    def post_data(self,
                  data: typing.Union[typing.MutableMapping[str, str],
                                     typing.List[typing.MutableMapping[str,
                                                                       str]]]):
        def create_document(row: typing.MutableMapping[str, str]):
            kwargs = {key: _type_convert(val) for key, val in row.items()}

            # Can't store field 'id' in document - rename it
            if 'id' in kwargs:
                kwargs[self.id_field_alias] = kwargs.pop('id')

            return kwargs

        # Put data in collection belonging to this data source
        with context_managers.switch_collection(CsvRow,
                                                self.location) as collection:
            collection = collection._get_collection()

            try:
                # Data is a dictionary - a single row
                collection.insert_one(create_document(data))

            except AttributeError:
                # Data is a list of dictionaries - multiple rows
                documents = (create_document(row) for row in data)
                collection.insert_many(documents)
Beispiel #19
0
 def add_dialog_one(self, uurl, ddialog, col_name):
     save_url = uurl
     save_dialog = ddialog
     dialog_obj = Dialog(url=save_url, dialog=save_dialog)
     # 这里是对 集合 Collection 进行选择
     with switch_collection(Dialog, col_name):
         return dialog_obj.save()
Beispiel #20
0
    def get_response(self,
                     params: typing.Optional[typing.Mapping[str, str]] = None):
        # TODO accept parameters provided twice as an inclusive OR
        if params is None:
            params = {}
        params = {key: _type_convert(val) for key, val in params.items()}

        with context_managers.switch_collection(CsvRow,
                                                self.location) as collection:
            records = collection.objects.filter(**params).exclude('_id')

            data = list(records.as_pymongo())

            # Couldn't store field 'id' in document - recover it
            for item in data:
                try:
                    item['id'] = item.pop(self.id_field_alias)

                except KeyError:
                    pass

            return JsonResponse({
                'status': 'success',
                'data': data,
            })
Beispiel #21
0
    def switch_collection(self, collection_name, keep_created=True):
        """
        Temporarily switch the collection for a document instance.

        Only really useful for archiving off data and calling `save()`::

            user = User.objects.get(id=user_id)
            user.switch_collection('old-users')
            user.save()

        :param str collection_name: The database alias to use for saving the
            document

        :param bool keep_created: keep self._created value after switching collection, else is reset to True


        .. seealso::
            Use :class:`~mongoengine.context_managers.switch_db`
            if you need to read from another database
        """
        with switch_collection(self.__class__, collection_name) as cls:
            collection = cls._get_collection()
        self._get_collection = lambda: collection
        self._collection = collection
        self._created = True if not keep_created else self._created
        self.__objects = self._qs
        self.__objects._collection_obj = collection
        return self
Beispiel #22
0
def log_cleanplus():
    # Log cleanplus DB entry
    with switch_collection(Cleanplus, __colcleanplus) as CCleanplus:
        num_saved, num_unsaved = 0, 0
        unsaved = []

        for c in cleanplus:
            data = CCleanplus(
                service = c['service'],
                year = c['year'],
                department = c['department'],
                team = c['team'],
                start_date = c['start_date'],
                end_date = c['end_date'],
                budget_summary = c['budget_summary'],
                budget_assigned = c['budget_assigned'],
                budget_current = c['budget_assigned'],
                budget_contract = c['budget_contract'],
                budget_spent = c['budget_spent']
            )

            try:
                data.save()
                num_saved += 1
            except:
                unsaved.append(c)
                num_unsaved += 1

        with open('error/unsaved_cleanplus.json', 'w') as f:
            json.dump(unsaved, f)
        print "CLEANPLUS: Logged", num_saved, "items,", num_unsaved, "unsaved items, total:", num_saved+num_unsaved
Beispiel #23
0
 def load_tick_data(
     self,
     symbol: str,
     exchange: Exchange,
     start: datetime,
     end: datetime,
     collection_name: str = None,
 ) -> Sequence[TickData]:
     if collection_name is None:
         s = DbTickData.objects(
             symbol=symbol,
             exchange=exchange.value,
             datetime__gte=start,
             datetime__lte=end,
         )
     else:
         with switch_collection(DbTickData, collection_name):
             s = DbTickData.objects(
                 symbol=symbol,
                 exchange=exchange.value,
                 datetime__gte=start,
                 datetime__lte=end,
             )
     data = [db_tick.to_tick() for db_tick in s]
     return data
Beispiel #24
0
 def load_bar_data(
     self,
     symbol: str,
     exchange: Exchange,
     interval: Interval,
     start: datetime,
     end: datetime,
     collection_name: str = None,
 ) -> Sequence[BarData]:
     if collection_name is None:
         s = DbBarData.objects(
             symbol=symbol,
             exchange=exchange.value,
             interval=interval.value,
             datetime__gte=start,
             datetime__lte=end,
         )
     else:
         with switch_collection(DbBarData, collection_name):
             s = DbBarData.objects(
                 symbol=symbol,
                 exchange=exchange.value,
                 interval=interval.value,
                 datetime__gte=start,
                 datetime__lte=end,
             )
     data = [db_bar.to_bar() for db_bar in s]
     return data
Beispiel #25
0
    def load_tick_data(
        self,
        symbol: str,
        exchange: Exchange,
        start: datetime,
        end: datetime,
        collection_name: str = None,
    ) -> List[TickData]:
        """"""
        if not collection_name:
            s: QuerySet = DbTickData.objects(
                symbol=symbol,
                exchange=exchange.value,
                datetime__gte=convert_tz(start),
                datetime__lte=convert_tz(end),
            )
        else:
            with switch_collection(DbBarData, collection_name):
                s: QuerySet = DbTickData.objects(
                    symbol=symbol,
                    exchange=exchange.value,
                    datetime__gte=convert_tz(start),
                    datetime__lte=convert_tz(end),
                )
        vt_symbol = f"{symbol}.{exchange.value}"
        ticks: List[TickData] = []
        for db_tick in s:
            db_tick.datetime = DB_TZ.localize(db_tick.datetime)
            db_tick.exchange = Exchange(db_tick.exchange)
            db_tick.gateway_name = "DB"
            db_tick.vt_symbol = vt_symbol
            ticks.append(db_tick)

        return ticks
Beispiel #26
0
def load_user(username):
    with switch_collection(User, 'users') as toGet:
        user = User.objects.get(username__exact=username)
        if not user:
            return None

        return user
Beispiel #27
0
    def switch_collection(self, collection_name, keep_created=True):
        """
        Temporarily switch the collection for a document instance.

        Only really useful for archiving off data and calling `save()`::

            user = User.objects.get(id=user_id)
            user.switch_collection('old-users')
            user.save()

        :param str collection_name: The database alias to use for saving the
            document

        :param bool keep_created: keep self._created value after switching collection, else is reset to True


        .. seealso::
            Use :class:`~mongoengine.context_managers.switch_db`
            if you need to read from another database
        """
        with switch_collection(self.__class__, collection_name) as cls:
            collection = cls._get_collection()
        self._get_collection = lambda: collection
        self._collection = collection
        self._created = True if not keep_created else self._created
        self.__objects = self._qs
        self.__objects._collection_obj = collection
        return self
Beispiel #28
0
def process_one_path(path, cover_mode, system_mode):
    '''
	'对传入的目录进行处理,将目录及目录下的所有文件和子目录的元数据都加入到db中
	'''
    global recursive_flag
    global Mybucket
    recursive_flag = recursive_flag + 1  # 递归层级加1

    if recursive_flag == 1:
        if isDirExists(path) == False:  # 若目录在数据库中不存在
            with switch_collection(Mybucket, collection_name) as Mybucket:
                Mybucket(na=path, fod=False).save()  # 创建对象并添加到db

    files = os.listdir(path)  # 罗列出目录下所有的子目录和文件

    for file in files:
        if file in break_names:  # 若是跳过文件
            continue

        if path == '/':  #若是linux系统的根目录'/'
            current_path = path + file
        else:
            current_path = path + '/' + file

        if isSysOrHide(system_mode, file, current_path):  # 若是系统文件或者隐藏文件
            continue

        if os.path.isdir(current_path):  # 若是目录
            if not isDirExists(current_path):  # 若该目录在db中不存在
                if isDirExists(path):  #判断其父目录是否存在,若存在
                    parentId = getDirId(path)  # 获取父目录的id
                    with switch_collection(Mybucket,
                                           collection_name) as Mybucket:
                        Mybucket(na=current_path, fod=False,
                                 did=parentId).save()  # 创建对象并添加到db
                else:
                    print("Error: no parent path")
                    sys.exit()
            process_one_path(current_path, cover_mode,
                             system_mode)  # 继续递归地处理子目录下的文件与目录

        elif os.path.isfile(current_path):  # 若是普通文件
            process_one_file(current_path, path, file, cover_mode)  # 对文件进行处理
        else:
            print("Warning: ", current_path, " is not a file or path")

    recursive_flag = recursive_flag - 1  # 该层递归结束,层级减1
Beispiel #29
0
def log_budgetspider():
    # Clear previous unmatched record
    with open("error/unmatched.tsv", 'w') as f:
        pass

    # Small opengov objects
    sopengov = []
    for i in opengov:
        sopengov.append(i['name'])

    # Binary search
    def bsearch(a, x, lo=0, hi=None):
        hi = hi or len(a)
        pos = bisect_left(a, x, lo, hi)
        return (pos if pos != hi and a[pos] == x else -1)

    # Log budgetspider DB entry
    with switch_collection(Budgetspider, __colbudgetspider) as CBudgetspider:
        num_unmatched, num_saved, num_unsaved, num_past = 0, 0, 0, 0
        unmatched, unsaved = [], []
        for c in cleanplus:
            if int(c['year']) < 2010:
                num_past += 1
                continue
            search_idx = bsearch(sopengov, c['service'])
            if search_idx != -1 and c['service'] == opengov[search_idx]['name']:
                data = CBudgetspider(
                    service = c['service'],
                    year = c['year'],
                    start_date = c['start_date'],
                    end_date = c['end_date'],
                    department = c['department'],
                    team = c['team'],
                    category_one = opengov[search_idx]['level_one'],
                    category_two = opengov[search_idx]['level_two'],
                    category_three = opengov[search_idx]['level_three'],
                    budget_summary = c['budget_summary'],
                    budget_assigned = c['budget_assigned'],
                    budget_current = c['budget_current'],
                    budget_contract = c['budget_contract'],
                    budget_spent = c['budget_spent']
                )
                try:
                    data.save()
                    num_saved += 1
                except:
                    num_unsaved += 1
                    unsaved.append((search_idx, c, opengov[search_idx]))
            else:
                with open("error/unmatched.tsv", 'a') as f:
                    err = "\t".join((c['service'], c['year'], c['department'], c['team'], c['budget_summary'])).encode('utf-8')
                    f.write(err + '\n')
                    unmatched.append(c)
                    num_unmatched += 1
        with open("error/unsaved_budgetspider.json", 'w') as f:
            json.dump(unsaved, f)
        with open("error/unmatched_budgetspider.json", 'w') as f:
            json.dump(unmatched, f)
        print "BUDGETSPIDER: Logged", num_saved, "items,", num_unsaved, "unsaved items,", num_unmatched, "unmatched items,", num_past, "2008-09 data, total:", num_saved+num_unsaved+num_unmatched+num_past
Beispiel #30
0
def last_entry(coll, site):
    with switch_collection(Article, coll) as article:
        data = article.objects(site=site).limit(1).order_by("-date")

    if data:
        date = data[0].date
    else:
        date = dt.datetime.utcnow() - dt.timedelta(days=7)
    return date
Beispiel #31
0
    def switch_collection(self, cls):
        """
        Switches to the chosen collection using Mongoengine's switch_collection.
        """

        if self.collection:
            with switch_collection(cls, self.collection) as new_cls:
                yield new_cls
        else:
            yield cls
Beispiel #32
0
 def _get_data_with_process_pool(self, database, security_list, process_manager_dict, security_list_i):
     with MongoConnect(database):
         thread_data_dict = {}
         for stock in security_list:
             with switch_collection(Kline, stock) as KlineDaily_security_code:
                 security_code_data = KlineDaily_security_code.objects(time_tag__lte=self.end).as_pymongo()
                 security_code_data_df = pd.DataFrame(list(security_code_data)).reindex(columns=self.field)
                 security_code_data_df.set_index(["time_tag"], inplace=True)
                 thread_data_dict[stock] = security_code_data_df.reindex(self.calendar_SZ).fillna(method='ffill')
         process_manager_dict[security_list_i] = thread_data_dict
Beispiel #33
0
def save_articles(coll, articles):
    if len(articles) < 1:
        return

    with switch_collection(Article, coll):
        for a in articles:
            try:
                a.save()
            except NotUniqueError:
                LOGGER[coll].error("Duplicate article: %s." % a.title)
Beispiel #34
0
def get_data_emotion(ID):
    with switch_collection(Stock, 'TRD_old') as StockS:
        data = StockS.objects(Stkcd=ID).all()
        date = [x['Trddt'] for x in data]
        data_temp = [[
            x['Opnprc'], x['Hiprc'], x['Loprc'], x['Clsprc'], x['Dnshrtrd'],
            x['Dnvaltrd'], x['Dsmvosd'], x['Dsmvtll'], x['Dretwd'],
            x['Dretnd'], x['Adjprcwd'], x['Adjprcnd'], x['Markettype'],
            x['Trdsta']
        ] for x in data]
        data_form = form_data(data_temp)
        """
        data_form = form_data([[x['Opnprc'], x['Hiprc'], x['Loprc'], x['Clsprc']] for x in data])
        """
        x_train = []
        y_train = []
        length = len(data_form)
        with switch_collection(Rumor, 'TRD_rumor') as RumorS:
            rumor = RumorS.objects(Stkcd=ID).all()
            rumor_dict = {}
            for item in rumor:
                rumor_dict.setdefault(item['Qdate'].replace('/', '-'),
                                      item['QLabel'])
            for i in range(length):
                if i < length - 5:
                    temp_time = []
                    for j in range(5):
                        daily = copy.deepcopy(data_form[i + j])
                        if date[i + j] in rumor_dict:
                            print('11111111')
                            if rumor_dict[date[i + j]]:
                                daily.append(1)
                            else:
                                daily.append(-1)
                        else:
                            daily.append(0)
                        temp_time.append(daily)
                    x_train.append(temp_time)
                    if data_temp[i + 5][3] < data_temp[i + 5][0]:
                        y_train.append(0)
                    else:
                        y_train.append(1)
            return x_train, y_train
Beispiel #35
0
    def switch_collection(self, cls):
        """
        Switches to the chosen collection using Mongoengine's switch_collection.
        """

        if self.collection:
            with switch_collection(cls, self.collection) as new_cls:
                yield new_cls
        else:
            yield cls
Beispiel #36
0
def isDirExists(dir_path):
    '''
	'判断dir_path的元数据是否在数据库中已经存在
	'若存在,则返回True,不存在,则返回False
	'''
    global Mybucket
    with switch_collection(Mybucket, collection_name) as Mybucket:
        if Mybucket.objects(na=dir_path).count() > 0:  # 根据目录路径进行查询
            return True
        else:
            return False
Beispiel #37
0
def returnSites():
    username = session['user_id']
    with switch_collection(User, 'users') as toGet:
        userObj = User.objects.get(username__exact=username)
        return jsonify(
            list(
                map(
                    lambda site: {
                        "content": site.content.hex(),
                        "id": site.id
                    }, userObj.sites)))
Beispiel #38
0
def addsites(id):
    with switch_collection(User, 'users') as toAdd:
        user = User.objects.get(username__exact=session['user_id'])
        info = SiteInfo(id=id, content=request.get_data())
        updated = User.objects(
            id=user.id,
            sites__id=id).update(set__sites__S__content=info.content)
        if not updated:
            User.objects(id=user.id).update_one(push__sites=info)

        user.save(validate=True)
        return jsonify({"success": "updated" if updated else "new"})
Beispiel #39
0
 def add_problem_one(self, data, col_name):
     doctor_name = data.split('#')[0]
     hospital = data.split('#')[1]
     date = data.split('#')[2]
     url = data.split('#')[3]
     problem_obj = Problem(doctor=doctor_name,
                           hospital=hospital,
                           date=date,
                           url=url)
     # 这里是对 集合 Collection 进行选择
     with switch_collection(Problem, col_name):
         return problem_obj.save()
Beispiel #40
0
    def insert_security_code(self, market, file_name, path):
        with MongoConnect(self.database):
            print(path + file_name + '\n')
            kline_daily_data = pd.read_csv(path + file_name, encoding='unicode_escape')
            security_code = file_name.split('.')[0] + '.' + market
            if is_security_type(security_code, 'EXTRA_STOCK_A'):
                kline_daily_data = kline_daily_data.reindex(columns=['date', 'open', 'high', 'low', 'close', 'volumw',
                                                                     'turover', 'match_items', 'interest'])
                kline_daily_data.rename(columns={'volumw': 'volume', 'turover': 'amount'},  inplace=True)
                kline_daily_data = kline_daily_data[kline_daily_data.date >= 20020104]
                with switch_collection(Kline, security_code) as KlineDaily_security_code:
                    doc_list = []
                    security_code_data = pd.DataFrame()
                    if security_code in self.data_dict.keys():
                        security_code_data = self.data_dict[security_code].set_index(["TRADE_DT"])
                        security_code_data = security_code_data.fillna(0)
                    for index, row in kline_daily_data.iterrows():
                        date_int = int(row['date'])
                        if not np.isnan(date_int):
                            try:
                                pre_close = int(10000 * security_code_data.loc[date_int, 'S_DQ_PRECLOSE'])
                            except KeyError:
                                pre_close = None
                            date_int = str(date_int)
                            time_tag = datetime.strptime(date_int, "%Y%m%d")
                            doc = KlineDaily_security_code(time_tag=time_tag, pre_close=pre_close,
                                                           open=int(row['open']), high=int(row['high']),
                                                           low=int(row['low']), close=int(row['close']),
                                                           volume=int(row['volume']), amount=int(row['amount']),
                                                           match_items=int(row['match_items']), interest=int(row['interest']))
                            doc_list.append(doc)

                    # 用csv全表补充20020104之前的日线数据,match_items为0
                    security_code_data = security_code_data[security_code_data.index < 20020104]
                    for index, row in security_code_data.iterrows():
                        if row['S_DQ_AMOUNT'] > 0:
                            date_int = int(index)
                            date_int = str(date_int)
                            time_tag = datetime.strptime(date_int, "%Y%m%d")
                            try:
                                pre_close = int(row['S_DQ_PRECLOSE'] * 10000)
                            except KeyError:
                                pre_close = None
                            doc = KlineDaily_security_code(time_tag=time_tag, pre_close=pre_close,
                                                           open=int(row['S_DQ_OPEN'] * 10000),
                                                           high=int(row['S_DQ_HIGH'] * 10000),
                                                           low=int(row['S_DQ_LOW'] * 10000),
                                                           close=int(row['S_DQ_CLOSE'] * 10000),
                                                           volume=int(row['S_DQ_VOLUME'] * 100),
                                                           amount=int(row['S_DQ_AMOUNT'] * 1000),
                                                           match_items=0, interest=0)
                            doc_list.append(doc)
                    KlineDaily_security_code.objects.insert(doc_list)
Beispiel #41
0
def isFileExists(filename, dirId):
    '''
	'判断当前文件的元数据是否在数据库中已经存在
	'若存在,则返回True,不存在,则返回False
	'''
    global Mybucket
    with switch_collection(Mybucket, collection_name) as Mybucket:
        if Mybucket.objects(Q(na=filename) & Q(did=dirId)
                            & Q(sds=False)).count() > 0:  # 根据文件名以及文件所在路径进行查询
            return True
        else:
            return False
Beispiel #42
0
def find_service(_service_name):
    print "budgetspider"
    with switch_collection(Budgetspider, __colbudgetspider) as CBudgetspider:
        for i in CBudgetspider.objects.all():
            if utf8(re.sub("[~*()'\". -]", "", i["service"])) == utf8(re.sub("[~*()'\". -]", "", _service_name)):
                for pr in map(utf8, (i["service"], i["year"], i["department"], i["category_one"], i["category_two"])):
                    print pr,
    print
    print "opengov"
    with switch_collection(Opengov, __colopengov) as COpengov:
        for i in COpengov.objects():
            if utf8(re.sub("[~*()'\". -]", "", i["name"])) == utf8(re.sub("[~*()'\". -]", "", _service_name)):
                for pr in map(utf8, (i["name"], i["level_one"], u["level_two"])):
                    print pr,
    print
    print "cleanplus"
    with switch_collection(Cleanplus, __colcleanplus) as CCleanplus:
        for i in CCleanplus.objects(service=_service_name):
            if utf8(re.sub("[~*()'\". -]", "", i["service"])) == utf8(re.sub("[~*()'\". -]", "", _service_name)):
                for pr in map(utf8, (i["service"], i["year"], i["department"])):
                    print pr,
Beispiel #43
0
def insert():
    error = None
    # Generate password hash, 12 rounds
    pwHash = bcrypt.generate_password_hash(request.form['password'])
    newUser = User(username=request.form['username'],
                   email=request.form['email'],
                   password=pwHash)
    with switch_collection(User, 'users') as toGet:
        try:
            if User.objects.get(username__exact=str(request.form['username'])):
                raise BadRequest('Registration Error, please try again.')

        except DoesNotExist:
            with switch_collection(User, 'users') as toAdd:
                newUser.secretKey = pyotp.random_base32()
                newUser.save(validate=True)
                totp = pyotp.TOTP(newUser.secretKey)
                uri = totp.provisioning_uri(request.form['email'],
                                            issuer_name='Kepyer.pro')
                session['verify'] = newUser.username
                # totp uri, can be used to generate QR code
                return uri
def update_unit_statuses():
  """
  Update unit statuses to reference the new symptom collection
  """

  from dcmetrometrics.eles import models
  from mongoengine.context_managers import switch_db, switch_collection

  d2r = dict() # Symptom description to record
  for s in models.SymptomCode.objects:
    d2r[s.description] = s


  # Fish out UnitStatus in the old format from the symptom_code collection
  # Backup to the symptom_code_old collection
  print """Exporting from collection escalator_statuses, assuming records are in the old format.
If successful, will backup to collection escalator_statuses_old..."""
  try:
    with switch_collection(models.UnitStatusOld, "escalator_statuses") as UnitStatusOld:
      n = models.UnitStatusOld.objects.count()
      for i, s in enumerate(models.UnitStatusOld.objects):
        # Make a backup of the old unit statuses
        print "Backing up unit status %i of %i (%.2f %%)"%(i, n, float(i)/n*100.0)
        s.switch_collection('escalator_statuses_old')
        s.save() # Save to the new collection
  except Exception as e:
    print "Caught Exception!\n"
    print str(e)
    return

  # Save unit statuses in the new format.
  n = models.UnitStatusOld.objects.count()
  for i, s_old in enumerate(models.UnitStatusOld.objects):
    print 'Reformatting unit status %i of %i (%.2f %%)'%(i, n, float(i)/n*100.0)
    s_new = s_old.to_new_format()
    s_new.pk = s_old.pk
    s_new.symptom = d2r[s_old.symptom.description]
    s_new.save()
Beispiel #45
0
def log_opengov():
    # Log opengov DB entry
    with switch_collection(Opengov, __colopengov) as COpengov:
        num_saved, num_unsaved = 0, 0
        unsaved = []

        for o in opengov:
            data = COpengov(
                service = o['name'],
                category_one = o['level_one'],
                category_two = o['level_two'],
                category_three = o['level_three']
            )

            try:
                data.save()
                num_saved += 1
            except:
                unsaved.append(o)
                num_unsaved += 1

        with open('error/unsaved_opengov.json', 'w') as f:
            json.dump(unsaved, f)
        print "OPENGOV: Logged", num_saved, "items,", num_unsaved, "unsaved items, total:", num_saved+num_unsaved
Beispiel #46
0
    def switch_collection(self, collection_name):
        """
        Temporarily switch the collection for a document instance.

        Only really useful for archiving off data and calling `save()`::

            user = User.objects.get(id=user_id)
            user.switch_collection('old-users')
            user.save()

        If you need to read from another database see
        :class:`~mongoengine.context_managers.switch_db`

        :param collection_name: The database alias to use for saving the
            document
        """
        with switch_collection(self.__class__, collection_name) as cls:
            collection = cls._get_collection()
        self._get_collection = lambda: collection
        self._collection = collection
        self._created = True
        self.__objects = self._qs
        self.__objects._collection_obj = collection
        return self
Beispiel #47
0
 def get_query_map_ids(self):
   """Get a list of query ids"""
   # Select a list of distinct query_id's from the alignments
   with switch_collection(Map, self.map_collection) as M:
     M.ensure_indexes()
     return M.objects.filter(type = 'query').distinct('name')
Beispiel #48
0
 def get_ref_maps(self):
   """Get a list of ref ids"""
   # Select a list of distinct query_id's from the alignments
   with switch_collection(Map, self.map_collection) as M:
     M.ensure_indexes()
     return M.objects.filter(type = 'reference')
Beispiel #49
0
def read_data_from_database_for___uid_or_uname_list():
    uid_or_uname_list = []
    
    this_uid_list = []
    this_nickname_list = []
    
    weibo_collection_name = []
#     weibo_collection_name = ["zhuanjiyin_nohashtag_original_2014_03_01_to_2014_03_10_detmine_1", \
#                        "zhuanjiyin_nohashtag_original_2014_03_10_to_2014_03_20_detmine_2", \
#                        "zhuanjiyin_nohashtag_original_2014_03_20_to_2014_04_01_detmine_3"]

    # 处理微博中的用户信息
    print "start single weibo"
    global Single_weibo_with_more_info_store
    for one_collection in weibo_collection_name:
        with switch_collection(Single_weibo_with_more_info_store, one_collection) as Single_weibo_with_more_info_store:
            for one_weibo in Single_weibo_with_more_info_store.objects:
                this_uid_list.append(one_weibo["uid"])
                this_uid_list.append(one_weibo["come_from_user_id"])
                this_nickname_list.extend(chuli_at_info(one_weibo["at_info"]))
                this_nickname_list.extend(chuli_at_info(one_weibo["retweet_reason_at_info"]))
    
    # 处理 comment 中的用户信息
    # 'zhuanjiyin_nohashtag_original_single_comment_2016_with_more_info'
    print "start comment"
    comment_collections = []
#     comment_collections.append('zhuanjiyin_nohashtag_original_single_comment_2014_with_more_info_repair')
    
    global Single_comment_store
    for one_collection in comment_collections:
        with switch_collection(Single_comment_store, one_collection) as Single_comment_store:
            for one_comment in Single_comment_store.objects:
                this_uid_list.append(one_comment["uid"])
                this_nickname_list.extend(chuli_at_info(one_comment["at_info"]))
                
    print "start repost"
    repost_collections = []
    repost_collections.append("zhuanjiyin_nohashtag_original_single_repost_2016_with_more_info_repair")

    global Single_repost_store
    for one_collection in repost_collections:
        with switch_collection(Single_repost_store, one_collection) as Single_repost_store:
            for one_comment in Single_repost_store.objects:
                this_uid_list.append(one_comment["uid"])
                this_nickname_list.extend(chuli_at_info(one_comment["at_info"]))
                
                
    uid_or_uname_list.extend(list(set(this_uid_list)))
    uid_or_uname_list.extend(list(set(this_nickname_list)))
    uid_or_uname_list = list(set(uid_or_uname_list))
#     print "start filter"
#     for uid_or_nickname in set(this_uid_list):    
#         if len(UserInfo_store.objects(Q(uid_or_uname=str(uid_or_nickname)) | Q(nickname=str(uid_or_nickname)))) == 0 or\
#          len(Bie_Ming_store.objects(Q(uid_or_uname=str(uid_or_nickname)) | Q(bie_ming=str(uid_or_nickname)))) == 0:
#             uid_or_uname_list.append(uid_or_nickname)
#             
#     for uid_or_nickname in set(this_nickname_list) :
#         if len(UserInfo_store.objects(Q(uid_or_uname=str(uid_or_nickname)) | Q(nickname=str(uid_or_nickname)))) == 0 or\
#          len(Bie_Ming_store.objects(Q(uid_or_uname=str(uid_or_nickname)) | Q(bie_ming=str(uid_or_nickname)))) == 0:
#             uid_or_uname_list.append(uid_or_nickname)

    random.shuffle(uid_or_uname_list)
    print len(uid_or_uname_list)
    return uid_or_uname_list
Beispiel #50
0
 def get_maps(self):
   with switch_collection(Map, self.map_collection) as M:
     M.ensure_indexes()
     return  M.objects
Beispiel #51
0
par.add_argument("dest")
par.add_argument("--src_uri", type=str, default = "mongodb://127.0.0.1/test")
par.add_argument("--dest_uri", type=str, default = "mongodb://127.0.0.1/test")
par.add_argument("--init", type=str, default = "")
par.add_argument("--query", type=str, default = "{}")
par.add_argument("--key", type=str, default = "_id")
par.add_argument("--verbose", type=int, default = 0)

config = par.parse_args()

query = json.loads(config.query)

sys.path.insert(0, config.module_abspath)
module = importlib.import_module(config.module)
cb = getattr(module, config.function)
init = getattr(module, config.init) if config.init else None

source_db = pymongo.MongoClient(config.src_uri).get_default_database()
source = source_db[config.source]

dest_db = pymongo.MongoClient(config.dest_uri).get_default_database()
dest = dest_db[config.dest]

connectMongoEngine(dest)
hk_colname = source.name + '_' + dest.name
switch_collection(housekeep, hk_colname).__enter__()

# print "DEBUG start worker", os.getpid()
do_chunks(init, cb, source, dest, query, config.key, config.verbose)
# print "DEBUG end worker", os.getpid()
Beispiel #52
0
 def get_alignments(self):
   with switch_collection(Alignment, self.alignment_collection) as A:
     # A.ensure_indexes()
     return A.objects
Beispiel #53
0
 def ensure_related_indexes(self):
   with switch_collection(Alignment, self.alignment_collection) as A:
     A.ensure_indexes()
   with switch_collection(Map, self.map_collection) as M:
     M.ensure_indexes()
Beispiel #54
0
def get_service():
    with switch_collection(Budgetspider, __colbudgetspider) as CBudgetspider:
        for i in CBudgetspider.objects(year="2013", category_one="일반공공행정", category_two="재정금융"):
            print utf8(i["category_three"]), utf8(i["service"]), i["budget_assigned"]
Beispiel #55
0
def calc_sum():
    years = ["2014"]
    with switch_collection(Budgetspider, __colbudgetspider) as CBudgetspider:
        for y in years:
            total_assigned = 0
            total_summary = 0
            categories = []
            budgets_assigned = []
            budgets_summary = []
            services = []

            for i in CBudgetspider.objects(year=y):
                name = (i["category_one"], i["category_two"], i["category_three"])
                if name in categories:
                    budgets_assigned[categories.index(name)] += i["budget_assigned"]
                    budgets_summary[categories.index(name)] += i["budget_summary"]
                    total_assigned += i["budget_assigned"]
                    total_summary += i["budget_summary"]
                else:
                    categories.append(name)
                    budgets_assigned.append(i["budget_assigned"])
                    budgets_summary.append(i["budget_summary"])
                    total_assigned += i["budget_assigned"]
                    total_summary += i["budget_summary"]

            with open("output/category_three_" + y + ".tsv", "w") as f:
                f.write(
                    "\t".join(
                        (
                            utf8("category_one"),
                            utf8("category_two"),
                            utf8("category_three"),
                            utf8("assigned"),
                            utf8("summary"),
                            utf8("num_services"),
                        )
                    )
                    + "\n"
                )
                f.close()
            with open("output/services_" + y + ".tsv", "w") as f:
                f.write(
                    "\t".join(
                        (
                            utf8("service"),
                            utf8("category_one"),
                            utf8("category_two"),
                            utf8("category_three"),
                            utf8("assigned"),
                            utf8("summary"),
                        )
                    )
                    + "n"
                )
                f.close()
            for i in range(len(categories)):
                if not categories[i] == None:
                    with open("output/category_three_" + y + ".tsv", "a") as f:
                        num_services = CBudgetspider.objects(
                            year=y,
                            category_one=utf8(categories[i][0]),
                            category_two=utf8(categories[i][1]),
                            category_three=utf8(categories[i][2]),
                        ).count()
                        f.write(
                            "\t".join(
                                (
                                    utf8(categories[i][0]),
                                    utf8(categories[i][1]),
                                    utf8(categories[i][2]),
                                    str(budgets_assigned[i]),
                                    str(budgets_summary[i]),
                                    str(num_services),
                                )
                            )
                            + "\n"
                        )
                    with open("output/services_" + y + ".tsv", "a") as f:

                        for b in CBudgetspider.objects(
                            year=y,
                            category_one=utf8(categories[i][0]),
                            category_two=utf8(categories[i][1]),
                            category_three=utf8(categories[i][2]),
                        ):
                            f.write(
                                "\t".join(
                                    (
                                        utf8(b["service"]),
                                        utf8(categories[i][0]),
                                        utf8(categories[i][1]),
                                        utf8(categories[i][2]),
                                        str(b["budget_assigned"]),
                                        str(b["budget_summary"]),
                                    )
                                )
                                + "\n"
                            )

            print "TOTAL", y, total_assigned, total_summary, len(CBudgetspider.objects(year=y)), "services"