Ejemplo n.º 1
0
 async def write_spend_ops(self, spend_ops: List[dict]):
     async with bulk_write_for(self.db.coin_collection,
                               ordered=False) as db_ops:
         for spend_op in spend_ops:
             db_ops.append(
                 UpdateOne(
                     filter={
                         'mintTxid': spend_op['mintTxid'],
                         'mintIndex': spend_op['mintIndex'],
                     },
                     update={
                         '$set': {
                             'spentTxid': spend_op['spentTxid'],
                             'spentHeight': spend_op['spentHeight'],
                         },
                     },
                 ))
Ejemplo n.º 2
0
    async def process_cursor(self, skip=0):
        """Process mongodb cursor
        Transform data and move to new database

        Args:
            docs(:obj:`pymongo.Cursor`): documents to be processed
        """
        bulk_write = []
        query = {}
        if self.max_entries == float('inf'):
            limit = 0
        else:
            limit = self.max_entries
        docs = self.from_collection.find(filter=query,
                                         projection={'_id': 0},
                                         no_cursor_timeout=True,
                                         batch_size=500,
                                         skip=skip,
                                         limit=limit)
        i = 0
        async for doc in docs:
            i += 1
            if i == self.max_entries:
                break
            if i != 0 and i % 50 == 0:
                print("Processing file {}".format(i + skip))
                try:
                    self.to_collection.bulk_write(bulk_write)
                    bulk_write = []
                except BulkWriteError as bwe:
                    pprint(bwe.details)
                    bulk_write = []
            doc["schema_version"] = "2"
            bulk_write.append(
                UpdateOne(
                    {'ec_number': doc["ec_number"]},
                    {'$set': json.loads(json.dumps(doc, ignore_nan=True))},
                    upsert=True))
        if len(bulk_write) != 0:
            try:
                self.to_collection.bulk_write(bulk_write)
            except BulkWriteError as bwe:
                pprint(bwe.details)
            finally:
                print("Done.")
        pass
Ejemplo n.º 3
0
    def update_many(cls, entities):
        bulk_operations = []

        for entity in entities:
            try:
                entity.validate()
                bulk_operations.append(
                    UpdateOne({'_id': entity.id}, {'$set': entity.to_mongo().to_dict()}, upsert=True))

            except ValidationError:
                pass

        collection = None
        if bulk_operations:
            collection = cls._get_collection() \
                .bulk_write(bulk_operations, ordered=False)
        return collection
Ejemplo n.º 4
0
def add_num_sources_to_topics():
    updates = [{
        "numSources":
        RefTopicLinkSet({
            "toTopic": t.slug,
            "linkType": {
                "$ne": "mention"
            }
        }).count(),
        "_id":
        t._id
    } for t in TopicSet()]
    db.topics.bulk_write([
        UpdateOne({"_id": t['_id']}, {"$set": {
            "numSources": t['numSources']
        }}) for t in updates
    ])
Ejemplo n.º 5
0
def fill_daily_k_at_suspension_days_at_date_one_collection(
        basics, all_dates, collection):
    """
    更新单个数据集的单个日期的数据
    :param basics:
    :param all_dates:
    :param collection:
    :return:
    """
    code_last_trading_daily_dict = dict()
    for date in all_dates:
        update_requests = []
        last_daily_code_set = set(code_last_trading_daily_dict.keys())
        for basic in basics:
            code = basic['code']
            # 如果循环日期小于
            if date < basic['timeToMarket']:
                print('日期:%s, %s 还没上市,上市日期: %s' % (date, code, basic['timeToMarket']), flush=True)
            else:
                # 找到当日数据
                daily = DB_CONN[collection].find_one({'code': code, 'date': date, 'index': False})
                if daily is not None:
                    code_last_trading_daily_dict[code] = daily
                    last_daily_code_set.add(code)
                else:
                    if code in last_daily_code_set:
                        last_trading_daily = code_last_trading_daily_dict[code]
                        suspension_daily_doc = {
                            'code': code,
                            'date': date,
                            'close': last_trading_daily['close'],
                            'open': last_trading_daily['close'],
                            'high': last_trading_daily['close'],
                            'low': last_trading_daily['close'],
                            'volume': 0,
                            'is_trading': False
                        }
                        update_requests.append(
                            UpdateOne(
                                {'code': code, 'date': date, 'index': False},
                                {'$set': suspension_daily_doc},
                                upsert=True))
        if len(update_requests) > 0:
            update_result = DB_CONN[collection].bulk_write(update_requests, ordered=False)
            print('填充停牌数据,日期:%s,数据集:%s,插入:%4d条,更新:%4d条' %
                  (date, collection, update_result.upserted_count, update_result.modified_count), flush=True)
Ejemplo n.º 6
0
 def migrate(self):
     collections = [InterfaceProfile._get_collection()]
     for collection in collections:
         bulk = []
         for ip in collection.find({"metrics.is_active": {"$exists": True}}):
             metrics = []
             if "metrics" not in ip:
                 continue  # Not configured
             for metric in ip["metrics"]:
                 metric["enable_periodic"] = bool(metric.get("is_active", False))
                 metric["enable_box"] = False
                 if "is_active" in metric:
                     del metric["is_active"]
                 metrics += [metric]
             bulk += [UpdateOne({"_id": ip["_id"]}, {"$set": {"metrics": metrics}})]
         if bulk:
             collection.bulk_write(bulk)
Ejemplo n.º 7
0
def operacionesBulk():
	operaciones = [
					InsertOne({
								"name": "Manuel Carlos",
								"title" : "Compositor",
								"tags" : ["Clasica", "Opera"],
								"account" : "0xsfdweryshjbkwituqruq67e3ed32a380ccf451c8",
								"birthday" : datetime.datetime(1994,4,11),
								"incription" : datetime.datetime.utcnow()
								}),
					UpdateOne({'name': 'Juan Pablo'},{'$set': {
																'name': 'Juana Pabla',
																'title': 'QA'
																},
													'$push': {'tags': 'Matlab'}})
					]
	persons.bulk_write(operaciones)
Ejemplo n.º 8
0
 def update_uplinks(cls, umap):
     if not umap:
         return
     bulk = []
     for o, uplinks in six.iteritems(umap):
         bulk += [UpdateOne({
             "_id": o
         }, {
             "$set": {
                 "uplinks": uplinks
             }
         }, upsert=True)]
     if bulk:
         try:
             ObjectData._get_collection().bulk_write(bulk, ordered=False)
         except BulkWriteError as e:
             logger.error("Bulk write error: '%s'", e.details)
Ejemplo n.º 9
0
def add_datetime_to_documents(collection):
    """
    Function add datetime object to mongoDB's documents
      * input: MongoDB collection
      * return: None
      * log: console - result of modification
    """
    operations = []

    for _id, date in unixtime_to_datetime(collection):
        operations.append(UpdateOne({'_id': _id}, {"$set": {
            'datetime': date
        }}))
    result = collection.bulk_write(operations)
    print(
        f'Внесены изменения в {len(operations)} документ из коллекции {collection.name} в базе данных {collection.database.name}'
    )
Ejemplo n.º 10
0
    def update_convert_column_data_type(self, db, table, column, data_type,
                                        provider_connection_id):
        real_data_type = self.check_column_data_type(db, table, column)
        if real_data_type == data_type:
            return True
        else:
            value, name = MongoColumnType.get_type(real_data_type)
            r_value, r_name = MongoColumnType.get_type(data_type)
            if r_name:
                operations = []
                collection = db[table]
                list_doc = collection.find(
                    {column: {
                        "$exists": True,
                        "$type": value
                    }})
                if list_doc.count() > 1000:
                    CustomColumnTaskConvert.objects.create(
                        connection_id=provider_connection_id,
                        table_name=table,
                        column_name=column,
                        data_real_type=name,
                        data_type=data_type,
                        current_row=0)
                else:
                    for doc in list_doc:
                        # Set a random number on every document update
                        operations.append(
                            UpdateOne(
                                {"_id": doc["_id"]},
                                # {'$set': {column: {'$convert': {'input': doc.get(column), 'to': r_name}}}})
                                {
                                    "$set": {
                                        column:
                                        self.convert_column_data_type(
                                            doc.get(column), r_name)
                                    }
                                }))

                        # Send once every 1000 in batch
                        collection.bulk_write(operations, ordered=False)
                        operations = []

                    if len(operations) > 0:
                        collection.bulk_write(operations, ordered=False)
            return True
Ejemplo n.º 11
0
def update_command_for(data):
    ''' the 'update command' is the MongoDB command that is used to update data
    should be a weather type object. it will have its filter and update set
    according to the entry content. It returns a command to update in a pymongo
    database.

    :param data: the dictionary created from the api calls
    :type data: dict
    :return: the command that will be used to find and update documents
    '''
    from pymongo import UpdateOne
    if "weather" in data:
        try:
            filters = {'_id': data['_id']}
            updates = {'$set': {'weather': data}}
        except:
            pass
    if "Weather" in data:
        try:
            filters = {'zipcode': data['Weather'].pop('zipcode'),\
                       'instant': data['Weather'].pop('instant')}
            updates = {'$set': {'weather': data['Weather']}}
        except:
            ### this if for the processing of data in OWM forecasted and observed.
            if "Weather" in data:
                try:
                    data['Weather']['time_to_instant'] \
                    = data['Weather'].pop('reference_time') \
                    - data['reception_time']
                    filters = {'zipcode': data.pop('zipcode'), \
                               'instant': data.pop('instant')}
                    updates = {'$set': {'weather': data['Weather']}}
                except KeyError:
                    print('caught KeyError')
            else:
                try:
                    filters = {'zipcode': data.pop('zipcode'), \
                               'instant': data.pop('instant')}
                    updates = {'$push': {'forecasts': data}}
                except KeyError:
                    print('caught keyerror')
    else:
        filters = {'zipcode': data.pop('zipcode'), \
                   'instant': data.pop('instant')}
        updates = {'$push': {'forecasts': data}}  # $push appends to list
    return UpdateOne(filters, updates, upsert=True)
Ejemplo n.º 12
0
def change_duplicate_race_mapping(race_cid: str):
    print("race_cid:%s" % race_cid)
    match_stage = MatchStage({'race_cid': race_cid, 'record_flag': 1})
    project_stage = ProjectStage(**{
        "race_cid": 1,
        "member_cid": 1,
        "race_check_point_cid": 1
    })
    group_stage = GroupStage({'_id': '$member_cid'},
                             count={'$sum': 1},
                             duplicate_list={'$push': '$$ROOT'})
    match_stage_count = MatchStage({'count': {'$gt': 1}})
    project_stage_1 = ProjectStage(**{'duplicate_list': 1})
    duplicate_race_mappings = RaceMapping.sync_aggregate([
        match_stage, project_stage, group_stage, match_stage_count,
        project_stage_1
    ]).to_list(None)
    count = 1
    if len(duplicate_race_mappings) > 0:
        for duplicate_race_mapping in duplicate_race_mappings:
            print('第%d个:' % count)
            print(duplicate_race_mapping.duplicate_list)
            duplicate_record_ids = [
                x._id for x in duplicate_race_mapping.duplicate_list
            ]
            not_need_index = 0  # 确定record_flag为1的元素
            for index, value in enumerate(
                    duplicate_race_mapping.duplicate_list):
                if value.race_check_point_cid:
                    not_need_index = index
            duplicate_record_ids.pop(not_need_index)
            print("record_flag需置为0的记录Id:")
            print(duplicate_record_ids)
            update_requests = []
            for object_id in duplicate_record_ids:
                update_requests.append(
                    UpdateOne({'_id': object_id}, {'$set': {
                        'record_flag': 0
                    }}))
            RaceMapping.sync_update_many(update_requests)
            print("-------END:record_flag已置为0---------------")
            count += 1
    else:
        print("-------未找到member_cid重复的记录-------")
    print("-------结束处理活动-------")
Ejemplo n.º 13
0
def main():
    comm = MPI.COMM_WORLD

    ID = comm.Get_rank()

    logging.basicConfig(filename='./logs/log_enrichWithAPI' + str(ID) + '.log',
                        level=logging.INFO)
    logging.info("\n")
    logging.info("Log file created. Program started.")
    logging.info("Reading config files.")

    with open("configMongo.yml", "r") as file:
        cfgMongo = yaml.safe_load(file)

    logging.info("Config files succesfully read.")
    logging.info("Loading Mongo collections.")
    MONGO_HOST = cfgMongo["host"]
    MONGO_DATABASE = cfgMongo["database"]

    clientMongo = pymongo.MongoClient(MONGO_HOST)
    databaseMongo = clientMongo[MONGO_DATABASE]

    collectionName = "scrape"
    scraperCollection = databaseMongo[collectionName]

    collectionName = "API"
    APICollection = databaseMongo[collectionName]

    logging.info("Mongo collections loaded.")

    BATCH_SIZE = round(
        APICollection.count_documents({}) / comm.Get_size() + 0.5)
    cursorAPI = APICollection.find().skip(BATCH_SIZE * ID).limit(BATCH_SIZE)

    logging.info("Preparing to update.")
    # This Updated enriches API documents with data from scrape
    upserts = [
        UpdateOne({'id': APIDoc["id"]},
                  {'$set': {
                      "regionCode": APIDoc["regionCode"]
                  }}) for APIDoc in cursorAPI
    ]
    logging.info("Updating documents.")
    scraperCollection.bulk_write(upserts)
    logging.info("Data saved succesfully to Mongo.")
Ejemplo n.º 14
0
    async def process_cursor(self, skip=0):
        """Process mongodb cursor
        Transform data and move to new database

        Args:
            docs(:obj:`pymongo.Cursor`): documents to be processed
        """
        bulk_write = []
        query = {}
        if self.max_entries == float('inf'):
            limit = 0
        else:
            limit = self.max_entries
        docs = self.from_collection.find(filter=query, projection={'_id': 0},
                                        no_cursor_timeout=True, batch_size=100,
                                        skip=skip, limit=limit)
        i = 0
        async for doc in docs:
            i += 1
            if i == self.max_entries:
                break
            if i != 0 and i % 50 == 0:
                print("Processing file {}".format(i + skip))
                try:
                    await self.to_collection.bulk_write(bulk_write)
                    bulk_write = []
                except BulkWriteError as bwe:
                    pprint(bwe.details)
                    bulk_write = []
            doc['schema_version'] = "2"
            for obj in doc["concentrations"]:
                tax_doc = await motor_client_manager.client.get_database(
                    "datanator-test")["taxon_tree"].find_one(filter={"tax_id": obj["ncbi_taxonomy_id"]},
                    projection={'canon_anc_ids': 1, 'canon_anc_names': 1})
                obj["canon_anc_ids"] = tax_doc["canon_anc_ids"]
                obj["canon_anc_names"] = tax_doc["canon_anc_names"]
                obj.pop("last_modified", None)
            bulk_write.append(UpdateOne({'inchikey': doc['inchikey']}, {'$set': json.loads(json.dumps(doc, ignore_nan=True))}, upsert=True))
        if len(bulk_write) != 0:
            try:
                self.to_collection.bulk_write(bulk_write)
            except BulkWriteError as bwe:
                pprint(bwe.details)
            finally:
                print("Done.")
Ejemplo n.º 15
0
def crawl_basic_at_date(date):
    """
    从Tushare抓取指定日期的股票基本信息
    :param date: 日期
    """
    # 默认推送上一个交易日的数据
    df_basics = ts.get_stock_basics(date)

    # 如果当日没有基础信息,在不做操作
    if df_basics is None:
        return

    update_requests = []
    codes = set(df_basics.index)
    for code in codes:
        doc = dict(df_basics.loc[code])
        try:
            # 将20180101转换为2018-01-01的形式
            time_to_market = datetime \
                .strptime(str(doc['timeToMarket']), '%Y%m%d') \
                .strftime('%Y-%m-%d')

            totals = float(doc['totals'])
            outstanding = float(doc['outstanding'])
            doc.update({
                'code': code,
                'date': date,
                'timeToMarket': time_to_market,
                'outstanding': outstanding,
                'totals': totals
            })

            update_requests.append(
                UpdateOne(
                    {'code': code, 'date': date},
                    {'$set': doc}, upsert=True))
        except:
            print('发生异常,股票代码:%s,日期:%s' % (code, date), flush=True)
            print(doc, flush=True)

    if len(update_requests) > 0:
        update_result = DB_CONN['basic'].bulk_write(update_requests, ordered=False)

        print('抓取股票基本信息,日期:%s, 插入:%4d条,更新:%4d条' %
              (date, update_result.upserted_count, update_result.modified_count), flush=True)
Ejemplo n.º 16
0
def add_bulk(urls, user_domain=False):
    # TODO: Завязать каждый анализ на ID
    # TODO: Допилить фронт (ошибки, результаты, крутилка до начала, окончание анализа, сраная очередь)
    bulk = []
    step = 0
    step_max = len(urls) * len(modules_list)
    last_log = oplog.find().sort('$natural', DESCENDING).limit(1).next()
    test_id = last_log['id'] + 1
    print('TESTID: ', test_id)
    for url in urls:
        if url == '':
            continue
        if '://' not in url:
            url = 'http://' + url
        if user_domain:
            user_verdict = "Good"
            document = {
                'url': url,
                'data': test_url(url, db, step, step_max, test_id),
                'datetime': datetime.datetime.now(),
                'user_verdict': user_verdict,
                'user_domain': user_domain
            }
        else:
            document = {
                'url': url,
                'data': test_url(url, db, step, step_max, test_id),
                'datetime': datetime.datetime.now()
            }
        step += len(modules_list)
        print(document['data'])
        bulk.append(document)
    oplog.insert_one({
        'msg': 'Testing completed',
        'step': step,
        'step_max': step_max,
        'id': test_id,
        "status": "Ready"
    })
    upserts = [
        UpdateOne({'url': x['url']}, {'$set': x}, upsert=True) for x in bulk
    ]
    analyzed_domains.bulk_write(upserts)
    # ml.fit()
    return True
Ejemplo n.º 17
0
 def forwards(self):
     MODELS = [
         "sa_administrativedomain", "sa_authprofile", "sa_managedobject",
         "sa_managedobjectprofile", "sa_terminationgroup"
     ]
     # Update postgresql tables
     for table in MODELS:
         rows = db.execute("SELECT id FROM %s WHERE bi_id IS NULL" % table)
         values = ["(%d, %d)" % (r[0], bi_hash(r[0])) for r in rows]
         while values:
             chunk, values = values[:PG_CHUNK], values[PG_CHUNK:]
             db.execute("""
                 UPDATE %s AS t
                 SET
                   bi_id = c.bi_id
                 FROM (
                   VALUES
                   %s              
                 ) AS c(id, bi_id)
                 WHERE c.id = t.id
                 """ % (table, ",\n".join(chunk)))
     # Update mongodb collections
     mdb = get_db()
     for coll_name in [
             "noc.profiles", "noc.services", "noc.serviceprofiles"
     ]:
         coll = mdb[coll_name]
         updates = []
         for d in coll.find({"bi_id": {"$exists": False}}, {"_id": 1}):
             updates += [
                 UpdateOne(
                     {"_id": d["_id"]},
                     {"$set": {
                         "bi_id": bson.Int64(bi_hash(d["_id"]))
                     }})
             ]
             if len(updates) >= MONGO_CHUNK:
                 coll.bulk_write(updates)
                 updates = []
         if updates:
             coll.bulk_write(updates)
     # Alter bi_id fields and create indexes
     for table in MODELS:
         db.execute("ALTER TABLE %s ALTER bi_id SET NOT NULL" % table)
         db.create_index(table, ["bi_id"], unique=True, db_tablespace="")
Ejemplo n.º 18
0
    def crawl_yjbb(self):
        url = 'http://dcfm.eastmoney.com//em_mutisvcexpandinterface' \
              '/api/js/get?type=YJBB20_YJBB&token=70f12f2f4f091e4' \
              '59a279469fe49eca5&st=reportdate&sr=-1&filter=(scode={1})&' \
              'p=1&ps=100&js={"pages":(tp),"data":(x)}'

        codes = ['000001']

        conn_pool = urllib3.PoolManager()
        for code in codes:
            try:
                response = conn_pool.request('GET', url.replace('{1}', code))

                # # 解析抓取结果
                result = json.loads(response.data.decode('UTF-8'))

                reports = result['data']

                update_requests = []
                for report in reports:
                    doc = {
                        'code': report['scode'],
                        'name': report['sname'],
                        'basic_eps': report['basiceps'],
                        'report_date': report['reportdate'][0:10],
                        'announced_date': report['latestnoticedate'][0:10]
                    }

                    update_requests.append(
                        UpdateOne(
                            {
                                'code': doc['code'],
                                'report_date': doc['report_date']
                            }, {'$set': doc},
                            upsert=True))

                if len(update_requests) > 0:
                    update_result = self.db['yjbb'].bulk_write(update_requests,
                                                               ordered=False)
                    print('获取财务报告,股票:%s,类型:业绩报表, 插入:%4d, 更新:%4d' %
                          (code, update_result.upserted_count,
                           update_result.modified_count),
                          flush=True)
            except:
                print('获取业绩报表时,发生错误:%s' % code, flush=True)
Ejemplo n.º 19
0
    def sort(id, step_ids):
        steps = step_ids.split(',')
        operations = []
        order = 1

        for step_id in steps:
            operations.append(UpdateOne({
                    '_id': ObjectId(id),
                    'steps._id': ObjectId(step_id)
                },{
                    '$set': {
                        'steps.$.order': order
                    }
                }, upsert=False))

            order += 1

        return mongo.db.templates.bulk_write(operations)
Ejemplo n.º 20
0
def find_all_refs():
    num_good = 0
    num_bad = 0
    total = db.rambi.count_documents({})
    updates = []
    for record in tqdm(db.rambi.find({}), total=total):
        temp_orefs, temp_num_bad = find_refs_in_record(record)
        updates += [
            UpdateOne({"id": record['id']}, {
                "$set": {
                    "refs": [temp_oref.normal() for temp_oref in temp_orefs]
                }
            })
        ]
        num_bad += temp_num_bad
        num_good += len(temp_orefs)
    db.rambi.bulk_write(updates)
    print(num_good, num_bad)
Ejemplo n.º 21
0
 def upsert_entities(self, entities):
     # {"_id":, "label":, "news": []}
     if not len(entities): return
     self["entities"].bulk_write([
         UpdateOne({'_id': e['_id']}, {
             "$set": {
                 "label": e["label"],
                 "text": e["text"]
             },
             "$addToSet": {
                 "news": {
                     "$each": e["news"]
                 }
             }
         },
                   upsert=True) for e in entities
     ],
                                 ordered=False)
Ejemplo n.º 22
0
 def insert_pages(self, domain, parent, urls, depth):
     pages = []
     for url in urls:
         pages.append({
             "domain": domain,
             "url": url,
             "depth": depth,
             "middleScan": False,
             "indexed": False
         })
     pages_objects = []
     for page in pages:
         pages_objects.append(
             UpdateOne({'url': page['url']}, {"$setOnInsert": page},
                       upsert=True))
     result = self.pages_col.bulk_write(pages_objects)
     self.set_indexed(parent)
     return result
Ejemplo n.º 23
0
 def inference_res_save_in_db(self, collection):
     print("开始从原始评论中进行观点挖掘...")
     size = len(self.comment_docs)
     query_requests = []
     for line_idx, doc in enumerate(self.comment_docs):
         """每条评论按照标点切分成每个短句处理"""
         line = doc['seg'].split(' ')
         cur_doc_res = self.extract_pairs(line)
         if cur_doc_res:
             query_requests.append(
                 UpdateOne(
                     {'_id': doc['_id']},
                     {'$set': {'pairs': cur_doc_res}},
                     upsert=True))
     res = collection.bulk_write(query_requests)
     res_str = '数据库更新结果:{},观点挖掘模块结束,共覆盖{}%的评论' \
         .format(res.bulk_api_result, round(res.modified_count / size * 100, 2))
     return res_str
Ejemplo n.º 24
0
    async def write_txs(self, raw_block: BtcBlock, txs: List[BtcTransaction]):
        async with bulk_write_for(self.db.tx_collection,
                                  ordered=False) as db_ops:
            for raw_tx in txs:
                tx = self.accessor.convert_raw_transaction(raw_tx, raw_block)
                row = asrow(tx)
                row['value'] = value2amount(tx.value)

                db_ops.append(
                    UpdateOne(
                        filter={
                            'txid': tx.txid,
                        },
                        update={
                            '$set': row,
                        },
                        upsert=True,
                    ))
Ejemplo n.º 25
0
def pls_import_collection(collection_name, data):
    db = get_mongo()[DB_NAME]
    collection = db.get_collection(collection_name)
    print('Import started')
    try:
        update_el = [
            UpdateOne({'_id': el['_id']}, {'$set': el}, upsert=True)
            for el in data
        ]
        result = collection.bulk_write(update_el)
        print(f"(Import) Values updated, errors: {result.bulk_api_result}")
    except BulkWriteError as bwe:
        print(bwe.details)
        return {'error': str(bwe.details)}
    except BaseException as err:
        print("Unrecognized error in import:\n {}".format(err))
        return {'error': "Unrecognized error in import:\n {}".format(err)}
    return None
Ejemplo n.º 26
0
def set_stats(stats,
              collection=COLLECTION["romania"],
              commit=True,
              **filter_kwargs):
    if not filter_kwargs:
        raise ValueError("filter kwargs required")

    update_params = {
        "filter": filter_kwargs,
        "update": {
            "$set": stats
        },
        "upsert": True,
    }
    if not commit:
        return UpdateOne(**update_params)

    return get_collection(collection).update_one(**update_params)
Ejemplo n.º 27
0
def flush_pbc_task(page: int = 1):
    """
    刷新央行公告
    :param page:
    :return:
    """
    job = PbcData()
    connect('jiucai', host='master:17585')
    data = job.get_by_page(page)
    if data:
        # noinspection PyProtectedMember
        bulk = [
            UpdateOne({
                "url": row.url,
            }, {"$set": row._asdict()}, upsert=True) for row in data
        ]
        # noinspection PyProtectedMember
        PbcDocument._get_collection().bulk_write(bulk, ordered=False)
Ejemplo n.º 28
0
 def post(self, liv, chapit, idantifikasyon):
     waves = request.json['waves']
     logger.info(liv)
     logger.info(chapit)
     logger.info(waves)
     vese_yo = [
         UpdateOne(
             {
                 'liv': liv,
                 'chapit': chapit,
                 'vese': index,
                 'idantifikasyon': idantifikasyon
             }, {'$set': {
                 'wave': wave
             }},
             upsert=True) for index, wave in enumerate(waves)
     ]
     MONGO_DB.db.odyo_bib_kreyol.bulk_write(vese_yo, ordered=False)
Ejemplo n.º 29
0
    def update_multiple(self, collection, documents, upsert=False):
        """
		Perform multiple update operations in bulk.

		:param collection: the collection in which the documents are updated.
		:param documents: the documents to be updated.
		:param upsert: set to True to perform an insert if no documents match the filter.
		"""
        operations = []
        for document in documents:
            operations.append(
                UpdateOne({"_id": document["_id"]}, {"$set": document},
                          upsert=upsert))
            if len(operations) == num_bulk_operations:
                collection.bulk_write(operations, ordered=False)
                operations = []
        if len(operations) > 0:
            collection.bulk_write(operations, ordered=False)
Ejemplo n.º 30
0
def update_gene_records(records, col):
    pipeline = list()
    try:
        for doc in records:
            update = deepcopy(doc)
            update.pop('variant_id')
            update.pop('_id')
            if 'snp_id' in doc:
                update.pop('snp_id')
            if 'uniprot_id' in doc:
                update.pop('uniprot_id')
                query = {'uniprot_id': doc['uniprot_id']}

                gene_update = {'$addToSet': {'tissues': update}}
                pipeline.append(UpdateOne(query, update))
        bulk_insert(col, pipeline, 'gene GTEx update')
    except Exception as e:
        logger.error(e)