예제 #1
0
                dien['lng'] = geocode_result[0]['geometry']['location']['lng']
                print(address, dien['lat'], dien['lng'])
                time.sleep(1.5)

        except Exception as e:
            print(e)
            print(address + "無經緯度資料")

    client = pymongo.MongoClient('localhost',
                                 27017,
                                 username='******',
                                 password='******')
    collection = client.rawData.info3Store

    ids = [data["name"] for data in dienList]
    operations = [
        UpdateOne({"_id": idn}, {'$set': data}, upsert=True)
        for idn, data in zip(ids, dienList)
    ]
    try:
        collection.bulk_write(operations, ordered=False)
    except Exception as er:
        print(er)

    mailTo("CrawlerThreeStoreSuccess", ["*****@*****.**"],
           "轉換地址完畢 共{}筆".format(Alln))
except Exception as er:
    errorstr = "出錯" + str(er) + "出錯位置:" + str(sys.exc_info()[2].tb_lineno)
    mailTo("CrawlerThreeStoreError", ["*****@*****.**"],
           "轉換地址失敗" + errorstr)
예제 #2
0
    def _load_institution_with_aggregate_variable(
        self, formatted_sheet_data: FormattedSheetData
    ):
        """
        Load the special institution that is also a composite variable in to the db.

        Parameters
        ----------
        formatted_sheet_data: FormattedSheetData
            The data to be loaded into the database. Please see the FormattedSheetData class to view its attributes.
        """
        # Create the institution primary keys
        institution = {
            InstitutionField.spreadsheet_id: formatted_sheet_data.spreadsheet_id,
            InstitutionField.sheet_id: formatted_sheet_data.sheet_id,
            InstitutionField.name: formatted_sheet_data.meta_data.get(
                MetaDataField.variable_heading
            ),
            InstitutionField.country: formatted_sheet_data.meta_data.get(
                InstitutionField.country
            ),
            InstitutionField.category: formatted_sheet_data.meta_data.get(
                InstitutionField.category
            ),
        }
        # Find the specific institution
        institution_doc = self._find_one(db_collection.institutions, institution)
        log.info(
            f"Loaded 1 {db_collection.institutions} "
            f"from sheet: {formatted_sheet_data.sheet_title}"
        )
        # Create a dict with variable heading(category of rights) as keys and the list of rights as the values
        variable_heading_dict = {}
        variable_heading_list = []
        for datum in formatted_sheet_data.formatted_data:
            # Category of a right is the first element in sigla_answers field of datum
            variable_heading = datum.get(CompositeVariableField.sigla_answers)[0].get(
                SiglaAnswerField.answer
            )
            sigla_answers = datum.get(CompositeVariableField.sigla_answers)[1:]
            if variable_heading in variable_heading_dict:
                variable_heading_dict.get(variable_heading).append(sigla_answers)
            else:
                variable_heading_list.append(variable_heading)
                variable_heading_dict[variable_heading] = [sigla_answers]
        # Create the list of variables
        variables = [
            {
                VariableField.institution: institution_doc.get(InstitutionField._id),
                VariableField.name: variable_heading,
                VariableField.heading: variable_heading,
                VariableField.sigla_answer: variable_heading_dict.get(variable_heading),
                VariableField.type: VariableType.aggregate,
                VariableField.variable_index: i,
            }
            for i, variable_heading in enumerate(variable_heading_list)
        ]
        # Create the list of update requests into the db, one for each variable
        update_requests = [
            UpdateOne(
                {
                    VariableField.institution: variable.get(VariableField.institution),
                    VariableField.name: variable.get(VariableField.name),
                    VariableField.variable_index: variable.get(
                        VariableField.variable_index
                    ),
                },
                {"$set": variable},
                upsert=True,
            )
            for i, variable in enumerate(variables)
        ]
        # Bulk write the variables into the db
        update_requests_results = self._db.get_collection(
            db_collection.variables
        ).bulk_write(update_requests)

        log.info(
            f"Loaded {update_requests_results.upserted_count} {db_collection.variables} "
            f"from sheet: {formatted_sheet_data.sheet_title}"
        )
예제 #3
0
movies_to_migrate = []
for doc in cursor:
    doc_id = doc.get('_id')
    lastupdated = doc.get('lastupdated', None)
    movies_to_migrate.append({
        "doc_id": ObjectId(doc_id),
        "lastupdated": parser.parse(lastupdated)
    })

print(f"{len(movies_to_migrate)} documents to migrate")

try:
    # TODO: Complete the UpdateOne statement below
    # build the UpdateOne so it updates the "lastupdated" field to contain
    # the new ISODate() type
    bulk_updates = [
        UpdateOne({"_id": movie.get("doc_id")},
                  {"$set": {
                      "lastupdated": movie.get("lastupdated")
                  }}) for movie in movies_to_migrate
    ]

    # here's where the bulk operation is sent to MongoDB
    bulk_results = mflix.movies.bulk_write(bulk_updates)
    print(f"{bulk_results.modified_count} documents updated")

except InvalidOperation:
    print("no updates necessary")
except Exception as e:
    print(str(e))
예제 #4
0
counter = tqdm(desc="Writing title to Mongo ", unit="piece")

bkh = BulkHelper(100000, lambda e: collection.bulk_write(e))

with open(r"rsc/CID-Title", "r", encoding="latin-1") as f:
    try:
        while 1:
            counter.update()
            new_line = next(f).strip("\n")
            id_string, title = new_line.split("\t")
            uid = make_uid(src_name, id_string)
            bkh.append(
                UpdateOne({"uid": uid},
                          {"$set": {
                              "title": title,
                              "_sync": False
                          }},
                          upsert=True))
    except StopIteration:
        pass

bkh.close()

counter = tqdm(desc="Writing SMILES and formulas to Mongo ", unit="piece")

bkh = BulkHelper(100000, lambda e: collection.bulk_write(e))

with open(r"rsc/CID-SMILES", "r", encoding="latin-1") as f:
    try:
        while 1:
            counter.update()
예제 #5
0
from pymongo import UpdateOne
from pymongo.errors import BulkWriteError

uri = os.getenv("MONGODB_URI")
# uri = "mongodb://localhost:27017/test" # Use this to test on local database.
client = MongoClient(uri)
database_name = pymongo.uri_parser.parse_uri(uri)['database']
db = client[database_name]

with open(os.path.join(os.path.dirname(__file__),
                       'consolidated_entries.json')) as f:
    updates = json.load(f)

bulk_updates = []
for update in updates:
    bulk_updates.append(
        UpdateOne(
            {
                "_id": ObjectId(update["_id"]["$oid"]),
                "_revisionIndex": update["_revisionIndex"]
            }, {
                "$set": {
                    "parsed_notes": update["parsed_notes"],
                    "consolidated_notes": update["consolidated_notes"]
                }
            }))

try:
    db.entries.bulk_write(bulk_updates, ordered=False)
except BulkWriteError as bwe:
    print(bwe.details)
예제 #6
0
def add_point(uid: str, point: Point):
    return UpdateOne({"_id": uid},
                     {"$set": make_nested_dict(prepare_pt(point), 'points')},
                     upsert=True)
예제 #7
0
def fill_daily_k_at_suspension_days_at_date_one_collection(
        basics, all_dates, collection):
    """
    更新单个数据集的单个日期的数据
    :param basics:
    :param all_dates:
    :param collection:
    :return:
    """
    code_last_trading_daily_dict = dict()
    total = len(all_dates)
    for i, date in enumerate(all_dates):

        _tic = time.process_time()

        update_requests = []
        last_daily_code_set = set(code_last_trading_daily_dict.keys())
        for basic in basics:
            code = basic['code']
            # 如果循环日期小于
            if date < basic['timeToMarket']:
                #                print('日期:%s, %s 还没上市,上市日期: %s' % (date, code, basic['timeToMarket']), flush=True)
                pass
            else:
                # 找到当日数据
                daily = DB_CONN[collection].find_one({
                    'code': code,
                    'date': date,
                    'index': False
                })
                if daily is not None:
                    code_last_trading_daily_dict[code] = daily
                    last_daily_code_set.add(code)
                else:
                    if code in last_daily_code_set:
                        last_trading_daily = code_last_trading_daily_dict[code]
                        suspension_daily_doc = {
                            'code': code,
                            'date': date,
                            'close': last_trading_daily['close'],
                            'open': last_trading_daily['close'],
                            'high': last_trading_daily['close'],
                            'low': last_trading_daily['close'],
                            'volume': 0,
                            'is_trading': False
                        }
                        update_requests.append(
                            UpdateOne(
                                {
                                    'code': code,
                                    'date': date,
                                    'index': False
                                }, {'$set': suspension_daily_doc},
                                upsert=True))

        _toc = time.process_time()
        expect_time = (_toc - _tic) * (total - i - 1)
        if len(update_requests) > 0:
            update_result = DB_CONN[collection].bulk_write(update_requests,
                                                           ordered=False)
            print(
                '填充停牌数据进度: (%s/%s), 日期:%s,数据集:%s,插入:%4d条,更新:%4d条, 预计还需要%.2fs' %
                (i + 1, total, date, collection, update_result.upserted_count,
                 update_result.modified_count, expect_time),
                flush=True)
 def update_many(self, collection_name, docs: list):
     with self._client:
         self._db[collection_name].bulk_write([
             UpdateOne({'id': doc["id"]}, {'$set': doc}) for doc in docs
             if "id" in doc
         ])
예제 #9
0
    def compute(self, begin_date, end_date):
        """
        计算指定日期内的信号
        :param begin_date: 开始日期
        :param end_date: 结束日期
        """
        all_codes = get_all_codes()

        dm = DataModule()

        N = 20
        k = 2

        for code in all_codes:
            try:
                df_daily = dm.get_k_data(code,
                                         autype='hfq',
                                         begin_date=begin_date,
                                         end_date=end_date)

                # 计算MB,盘后计算,这里用当日的Close
                df_daily['MID'] = df_daily['close'].rolling(N).mean()
                # 计算STD20
                df_daily['std'] = df_daily['close'].rolling(N).std()
                # 计算UP
                df_daily['UP'] = df_daily['MID'] + k * df_daily['std']
                # 计算down
                df_daily['DOWN'] = df_daily['MID'] - k * df_daily['std']

                # 将日期作为索引
                df_daily.set_index(['date'], inplace=True)

                # 上轨和中轨右移一位
                shifted_up = df_daily['UP'].shift(1)
                shifted_middle = df_daily['MID'].shift(1)

                # 收盘价突破或者跌破中轨的幅度占上轨和中轨宽度的比例
                ref_line = (df_daily['close'] -
                            shifted_middle) / (shifted_up - shifted_middle)

                ref_prev = ref_line.shift(1)

                # 找到时间窗口内的最小值
                min_val = ref_line.rolling(10).min()

                # 找到时间窗口内最低点前的最大值
                max_leading_value = ref_line.rolling(10).apply(
                    lambda vec: vec[:np.argmin(vec) + 1].max().astype(float),
                    raw=True)

                # 中轨支撑的作用的范围
                delta = 0.15

                # 判断是否存在中轨支撑反弹的信号,要求:
                # 时间窗口的最低点之前的最大值大于delta,最小值的绝对值小于delta,就有一个穿越阈值分界线的动作;
                # 当前日期在也在阈值之上,表示又从最低点穿越到阈值分界线之上;
                # 而判断前一日在阈值分界线之下,表示穿越是在当前交易日完成
                m_rebound_mask = (abs(min_val) <= delta) & (ref_line > delta) & (ref_prev <= delta) & \
                                 (max_leading_value > delta)

                # 将信号保存到数据库
                update_requests = []
                df_daily['m_rebound_mask'] = m_rebound_mask
                df_daily = df_daily[df_daily['m_rebound_mask']]
                for date in df_daily.index:
                    doc = {'code': code, 'date': date, 'signal': 'mid_rebound'}
                    update_requests.append(
                        UpdateOne(doc, {'$set': doc}, upsert=True))

                if len(update_requests) > 0:
                    update_result = self.collection.bulk_write(update_requests,
                                                               ordered=False)
                    print('%s, upserted: %4d, modified: %4d' %
                          (code, update_result.upserted_count,
                           update_result.modified_count),
                          flush=True)
            except:
                traceback.print_exc()
    async def post_process(self):
        """
        url: POST /api/plan_management/plan/status_update
        监控app发往计划管理app的数据结构:
        [
            {
                "plan_no": "A181120203-00",
                "status": 2, //2表示进行中,3表示暂停,4表示已完工
                "progress_detail":  //仅更新状态为暂停和已完工时传合格数和不合格数,对于进行中没有这个字段
                {
                    "qualified_count": 1000,
                    "unqualified_count": 2
                }
            }
        ]
        :return:
        {
            "code":"success",
            "info": "",
            "data": null
        }
        """
        req_data_list = json.loads(self.request.body)
        try:
            plan_collection = get_plan_db_collection()
            # 转存成字典方便操作
            status_data_dict = dict()
            for req_data in req_data_list:
                plan_no = req_data['plan_no']
                status_data_dict[plan_no] = req_data

            task_no_list = [
                status_data['plan_no'] for status_data in req_data_list
            ]
            query = {'task_no': {'$in': task_no_list}}
            update_plan_list = []
            async for document in plan_collection.find(query):
                document_id = document['_id']
                task_no = document['task_no']
                status_data = status_data_dict[task_no]
                updated_status = status_data['status']
                update_clause = {'plan_status': updated_status}
                if updated_status == PlanStatusType.in_progress.value:
                    real_start_date = document['real_start_date']
                    if not real_start_date:
                        # 如果是计划刚开始加工了,则设置真实开工时间为当前日期
                        update_clause['real_start_date'] = str(
                            datetime.now().date())
                if 'progress_detail' in status_data:
                    # 如果是暂停或者可完工,则会传过来合格数和不合格数
                    qualified_count = status_data['progress_detail'][
                        'qualified_count']
                    unqualified_count = status_data['progress_detail'][
                        'unqualified_count']
                    update_clause['qualified_count'] = qualified_count
                    update_clause['unqualified_count'] = unqualified_count

                update_data = UpdateOne({'_id': document_id},
                                        {'$set': update_clause})
                update_plan_list.append(update_data)
            if update_plan_list:
                # 批量更新计划
                plan_collection.bulk_write(update_plan_list)
                self.send_response_data(MesCode.success, None, '')
                get_logger().info('更新计划状态成功: {}'.format(req_data_list))
        except Exception as e:
            log_exception(e, '更新计划状态数据失败')
            self.send_response_data(MesCode.fail, None,
                                    '更新计划状态数据失败: {}'.format(req_data_list))
예제 #11
0
    def fill_daily_k_at_suspension_days_at_date_one_collection(self,
            basics, all_dates, collection):
        """
        更新单个数据集的单个日期的数据
        :param basics: 上一交易日基础数据
        :param all_dates:
        :param collection:
        :return:
        """
        code_last_trading_daily_dict = dict()
        for date in all_dates:  # 按升序来
            update_requests = []
            last_daily_code_set = set(code_last_trading_daily_dict.keys())
            for basic in basics: #轮询股票
                code = basic['code']
                # 如果循环日期小于
                if date < basic['listDate']:
                    print('日期:%s, %s 还没上市,上市日期: %s' % (date, code, basic['list_date']), flush=True)
                else:
                    # 找到当日数据
                    daily = DB_CONN[collection].find_one({'code': code, 'date': date})
                    if daily is not None:
                        code_last_trading_daily_dict[code] = daily
                        last_daily_code_set.add(code)
                        trading_daily_doc = {
                            'is_trading': True
                        }
                        update_requests.append(
                            UpdateOne(
                                {'code': code, 'date': date},
                                {'$set': trading_daily_doc},
                                upsert=True))

                        stock_node = "Stock" + date
                        query = "match(p:%s) where p.stock_code='%s' set p.is_trading = %r" % (stock_node, trading_daily_doc['is_trading'])
                        try:
                            GRAPH_DB_CONN.run(query)
                        except Exception as e:
                            print(e)
                    else:
                        suspension_daily_doc = {}
                        print(last_daily_code_set)
                        if code in last_daily_code_set: #从此日起开始停牌
                            print("begin to tingpai")
                            last_trading_daily = code_last_trading_daily_dict[code]
                            suspension_daily_doc = {
                                'code': code,
                                'date': date,
                                'close': last_trading_daily['close'],
                                'open': last_trading_daily['close'],
                                'high': last_trading_daily['close'],
                                'low': last_trading_daily['close'],
                                'vol': 0,
                                'change': 0,
                                'pct_chg': 0,
                                'amount': 0,
                                'is_trading': False
                            }
                            update_requests.append(
                                UpdateOne(
                                    {'code': code, 'date': date},
                                    {'$set': suspension_daily_doc},
                                    upsert=True))

                        print(suspension_daily_doc)
                        if len(suspension_daily_doc) > 0 and collection == 'daily':
                            stock_node = "Stock" + date
                            query = "match(p:%s) where p.stock_code='%s' set p.close = %f, p.open = %f, p.high = %f, p.low = %f," \
                                    "p.is_trading = %r" % (
                            stock_node, code, suspension_daily_doc['close'], suspension_daily_doc['open'], suspension_daily_doc['high'],
                            suspension_daily_doc['low'], suspension_daily_doc['is_trading'])
                            try:
                                GRAPH_DB_CONN.run(query)
                            except Exception as e:
                                print(e)

            if len(update_requests) > 0:
                update_result = DB_CONN[collection].bulk_write(update_requests, ordered=False)
                print('填充停牌数据,日期:%s,数据集:%s,插入:%4d条,更新:%4d条' %
                      (date, collection, update_result.upserted_count, update_result.modified_count), flush=True)
예제 #12
0
 def cache_start(self, _, data):
     yield "shards", UpdateOne({"_id": "meta"},
                               {"$set": {
                                   "_id": "meta",
                                   **data
                               }})
예제 #13
0
 def cache_guild_role_create(self, _, data, *, upsert=True):
     role = data["role"]
     role["_id"] = role["id"]
     role["guild_id"] = data["guild_id"]
     yield "roles", UpdateOne({"_id": role["id"]}, {"$set": role},
                              upsert=upsert)
예제 #14
0
 def cache_channel_create(self, _, data, *, upsert=True):
     data["_id"] = data["id"]
     yield "channels", UpdateOne({"_id": data["id"]}, {"$set": data},
                                 upsert=upsert)
예제 #15
0
def BulkSetData(_id, gvkey):

    return UpdateOne({"_id":ObjectId(_id)},{"$set":{"gvkey":gvkey}})
예제 #16
0
 def migrate(self):
     bulk = [
         UpdateOne(
             {"_id": bson.ObjectId("5b6d6819d706360001a0b716")},
             {
                 "$set": {
                     "name": "Group",
                     "uuid": UUID("8874518c-effd-41fe-81bf-d67f1519ccf2"),
                     "description": "Grouping element",
                     "single_service": False,
                     "single_client": False,
                     "allow_children": True,
                     "bi_id": Int64(3859384814270643576),
                 }
             },
             upsert=True,
         ),
         UpdateOne(
             {"_id": bson.ObjectId("5b6d6c9fd706360001f5c053")},
             {
                 "$set": {
                     "name": "Network | Controller",
                     "uuid": UUID("bcf7ad57-81a4-4da0-8e6d-e429c9e21532"),
                     "description": "Controller - CPE relation",
                     "service_model": "sa.ManagedObject",
                     "client_model": "sa.ManagedObject",
                     "single_service": False,
                     "single_client": True,
                     "allow_children": False,
                     "bi_id": Int64(2204453448135692504),
                 }
             },
             upsert=True,
         ),
         UpdateOne(
             {"_id": bson.ObjectId("5b6dbbefd70636000170b980")},
             {
                 "$set": {
                     "name": "Object Group",
                     "uuid": UUID("f4c6d51d-d597-4183-918e-23efd748fd12"),
                     "description": "Arbitrary group of Managed Objects",
                     "service_model": "sa.ManagedObject",
                     "single_service": False,
                     "single_client": False,
                     "allow_children": False,
                     "bi_id": Int64(4062440225872880146),
                 }
             },
             upsert=True,
         ),
         UpdateOne(
             {"_id": bson.ObjectId("5b6d6be1d706360001f5c04e")},
             {
                 "$set": {
                     "name": "Network | IPoE Termination",
                     "uuid": UUID("ef42d9fe-d217-4754-b628-a1f71f6159da"),
                     "description":
                     "IPoE Temination (access equipment -> BRAS)",
                     "service_model": "sa.ManagedObject",
                     "client_model": "sa.ManagedObject",
                     "single_service": False,
                     "single_client": False,
                     "allow_children": False,
                     "bi_id": Int64(4546441601898809637),
                 }
             },
             upsert=True,
         ),
         UpdateOne(
             {"_id": bson.ObjectId("5b6d6beed706360001f5c04f")},
             {
                 "$set": {
                     "name": "Network | PPPoE Termination",
                     "uuid": UUID("a8ddcd67-d8c4-471d-9a9b-9f4749e09011"),
                     "description":
                     "PPPoE Temination (access equipment -> BRAS)",
                     "service_model": "sa.ManagedObject",
                     "client_model": "sa.ManagedObject",
                     "single_service": False,
                     "single_client": False,
                     "allow_children": False,
                     "bi_id": Int64(3384545658468911814),
                 }
             },
             upsert=True,
         ),
         UpdateOne(
             {"_id": bson.ObjectId("5b6d6c56d706360001f5c052")},
             {
                 "$set": {
                     "name": "Network | PPTP Termination",
                     "uuid": UUID("8ce08fc8-a5b1-448d-9c2c-ac1419ad9816"),
                     "description":
                     "PPTP Temination (access equipment -> BRAS)",
                     "service_model": "sa.ManagedObject",
                     "client_model": "sa.ManagedObject",
                     "single_service": False,
                     "single_client": False,
                     "allow_children": False,
                     "bi_id": Int64(2085768785416150430),
                 }
             },
             upsert=True,
         ),
         UpdateOne(
             {"_id": bson.ObjectId("5b6e785ed70636000170b9a6")},
             {
                 "$set": {
                     "name": "Voice | SIP Termination",
                     "uuid": UUID("3e15a3ea-f4c1-49a1-a183-d61dd79531c2"),
                     "description":
                     "SIP Temination (media gateway -> softswitch)",
                     "service_model": "sa.ManagedObject",
                     "client_model": "sa.ManagedObject",
                     "single_service": False,
                     "single_client": False,
                     "allow_children": False,
                     "bi_id": Int64(4632306658633376591),
                 }
             },
             upsert=True,
         ),
     ]
     self.mongo_db.technologies.bulk_write(bulk)
예제 #17
0
def create0(link):
    '''Populates mongoDB database with our index'''
    print("############### Starting storage #####################")

    ##    root = 'C:/Users/richa/Desktop/webpages/WEBPAGES_RAW'
    root = "C:/Users/spsan/OneDrive/Documents/UCI 2018-19/CS 121/Project 3/webpages/WEBPAGES_RAW"
    counter = 0
    bulk_update_list = []
    # https://www.tutorialspoint.com/python3/os_walk.htm
    for root, dirs, files in sorted(os.walk(root)):
        for f in files:
            if not f.endswith('.json') and not f.endswith(
                    '.tsv') and not f.endswith('.DS_Store'):
                path = os.path.join(root, f)
                ##                doc_id = path[33:] # Richard's path
                doc_id = path[85:]  # Sandy's path
                tokenDict = preprocess.tokenizer(path)
                #========== insert here ========
                for token, freq in tokenDict.items():
                    append_list = {"token": token}
                    append_list2 = {
                        "$set": {
                            "posting" + str(counter): {
                                "docID": doc_id,
                                "freq": freq,
                                "tf-idf": "None"
                            }
                        }
                    }
                    bulk_update_list.append(append_list)
                    bulk_update_list.append(append_list2)
                    counter += 1
                    print(counter)
                    if (counter % 10 == 0):
                        link.bulk_write([
                            UpdateOne(bulk_update_list[0], bulk_update_list[1],
                                      True),
                            UpdateOne(bulk_update_list[2], bulk_update_list[3],
                                      True),
                            UpdateOne(bulk_update_list[4], bulk_update_list[5],
                                      True),
                            UpdateOne(bulk_update_list[6], bulk_update_list[7],
                                      True),
                            UpdateOne(bulk_update_list[8], bulk_update_list[9],
                                      True),
                            UpdateOne(bulk_update_list[10],
                                      bulk_update_list[11], True),
                            UpdateOne(bulk_update_list[12],
                                      bulk_update_list[13], True),
                            UpdateOne(bulk_update_list[14],
                                      bulk_update_list[15], True),
                            UpdateOne(bulk_update_list[16],
                                      bulk_update_list[17], True),
                            UpdateOne(bulk_update_list[18],
                                      bulk_update_list[19], True)
                        ])
                        bulk_update_list = []

                #=========== end here==========

    print("############### Finished storage #####################")
예제 #18
0
파일: common.py 프로젝트: olethanh/pyaleph
async def incoming(message,
                   chain_name=None,
                   tx_hash=None,
                   height=None,
                   seen_ids=None,
                   check_message=False,
                   retrying=False,
                   bulk_operation=False):
    """ New incoming message from underlying chain.

    For regular messages it will be marked as confirmed
    if existing in database, created if not.
    """
    hash = message['item_hash']
    sender = message['sender']
    chain = chain_name
    ids_key = (hash, sender, chain)

    if chain_name and tx_hash and height and seen_ids is not None:
        if ids_key in seen_ids.keys():
            if height > seen_ids[ids_key]:
                return True

    filters = {
        'item_hash': hash,
        'chain': message['chain'],
        'sender': message['sender'],
        'type': message['type']
    }
    existing = await Message.collection.find_one(filters,
                                                 projection={
                                                     'confirmed': 1,
                                                     'confirmations': 1,
                                                     'time': 1,
                                                     'signature': 1
                                                 })

    if check_message:
        if existing is None or (existing['signature'] != message['signature']):
            # check/sanitize the message if needed
            message = await check_message_fn(message,
                                             from_chain=(chain_name
                                                         is not None))

    if message is None:
        return True  # message handled.

    if retrying:
        LOGGER.debug("(Re)trying %s." % hash)
    else:
        LOGGER.info("Incoming %s." % hash)

    # we set the incoming chain as default for signature
    message['chain'] = message.get('chain', chain_name)

    # if existing is None:
    #     # TODO: verify if search key is ok. do we need an unique key for messages?
    #     existing = await Message.collection.find_one(
    #         filters, projection={'confirmed': 1, 'confirmations': 1, 'time': 1})

    if chain_name and tx_hash and height:
        # We are getting a confirmation here
        new_values = await mark_confirmed_data(chain_name, tx_hash, height)

        updates = {
            '$set': {
                'confirmed': True,
            },
            '$min': {
                'time': message['time']
            },
            '$addToSet': {
                'confirmations': new_values['confirmations'][0]
            }
        }
    else:
        updates = {
            '$max': {
                'confirmed': False,
            },
            '$min': {
                'time': message['time']
            }
        }

    # new_values = {'confirmed': False}  # this should be our default.
    should_commit = False
    if existing:
        if seen_ids is not None:
            if ids_key in seen_ids.keys():
                if height > seen_ids[ids_key]:
                    return True
                else:
                    seen_ids[ids_key] = height
            else:
                seen_ids[ids_key] = height

        # THIS CODE SHOULD BE HERE...
        # But, if a race condition appeared, we might have the message twice.
        # if (existing['confirmed'] and
        #         chain_name in [c['chain'] for c in existing['confirmations']]):
        #     return

        LOGGER.debug("Updating %s." % hash)

        if chain_name and tx_hash and height:
            # we need to update messages adding the confirmation
            #await Message.collection.update_many(filters, updates)
            should_commit = True

    else:
        # if not (chain_name and tx_hash and height):
        #     new_values = {'confirmed': False}  # this should be our default.

        try:
            content, size = await get_message_content(message)
        except Exception:
            LOGGER.exception("Can't get content of object %r" % hash)
            content = None

        if content is None:
            LOGGER.info("Can't get content of object %r, retrying later." %
                        hash)
            if not retrying:
                await PendingMessage.collection.insert_one({
                    'message':
                    message,
                    'source':
                    dict(
                        chain_name=chain_name,
                        tx_hash=tx_hash,
                        height=height,
                        check_message=check_message  # should we store this?
                    )
                })
            return

        if content == -1:
            LOGGER.warning("Can't get content of object %r, won't retry." %
                           hash)
            return -1

        if content.get('address', None) is None:
            content['address'] = message['sender']

        if content.get('time', None) is None:
            content['time'] = message['time']

        # warning: those handlers can modify message and content in place
        # and return a status. None has to be retried, -1 is discarded, True is
        # handled and kept.
        # TODO: change this, it's messy.
        try:
            handling_result = await handle_incoming_message(message, content)
        except Exception:
            LOGGER.exception("Error using the message type handler")
            handling_result = None

        if handling_result is None:
            LOGGER.debug("Message type handler has failed, retrying later.")
            if not retrying:
                await PendingMessage.collection.insert_one({
                    'message':
                    message,
                    'source':
                    dict(
                        chain_name=chain_name,
                        tx_hash=tx_hash,
                        height=height,
                        check_message=check_message  # should we store this?
                    )
                })
            return

        if handling_result != True:
            LOGGER.warning("Message type handler has failed permanently for "
                           "%r, won't retry." % hash)
            return -1

        if not await check_sender_authorization(message, content):
            LOGGER.warn("Invalid sender for %s" % hash)
            return True  # message handled.

        if seen_ids is not None:
            if ids_key in seen_ids.keys():
                if height > seen_ids[ids_key]:
                    return True
                else:
                    seen_ids[ids_key] = height
            else:
                seen_ids[ids_key] = height

        LOGGER.debug("New message to store for %s." % hash)
        # message.update(new_values)
        updates['$set'] = {
            'content': content,
            'size': size,
            'item_content': message.get('item_content'),
            'item_type': message.get('item_type'),
            'channel': message.get('channel'),
            'signature': message.get('signature')
        }
        should_commit = True
        #await Message.collection.insert_one(message)

        # since it's on-chain, we need to keep that content.
        # if message['item_type'] == 'ipfs' and app['config'].ipfs.enabled.value:
        #     LOGGER.debug("Pining hash %s" % hash)
        # await pin_hash(hash)

    if should_commit:
        action = UpdateOne(filters, updates, upsert=True)
        if not bulk_operation:
            await Message.collection.bulk_write([action])
        else:
            return action
    return True  # message handled.
예제 #19
0
def add_fields(uid: str, fields: Mapping):
    return UpdateOne({"_id": uid},
                     {"$set": make_nested_dict(fields, 'fields')},
                     upsert=True)
    lastupdated = doc.get('lastupdated', None)
    movies_to_migrate.append(
        {
            "doc_id": ObjectId(doc_id),
            "lastupdated": parser.parse(lastupdated) 
          
        }
    )

print(f"{len(movies_to_migrate)} documents to migrate")

try:
    # TODO: Complete the UpdateOne statement below
    # build the UpdateOne so it updates the "lastupdated" field to contain
    # the new ISODate() type
    bulk_updates = [UpdateOne(
        {"_id": movie.get("doc_id")},
        #{"$some_update_operator": {"some_field_to_update"}}
               
        {"$set": {"lastupdated": movie.get("lastupdated")}}
    ) for movie in movies_to_migrate]

    # here's where the bulk operation is sent to MongoDB
    bulk_results = mflix.movies.bulk_write(bulk_updates)
    print(f"{bulk_results.modified_count} documents updated")

except InvalidOperation:
    print("no updates necessary")
except Exception as e:
    print(str(e))
예제 #21
0
def fill_au_factor_pre_close(begin_date, end_date):
    """
    为daily数据集填充:
    1. 复权因子au_factor,复权的因子计算方式:au_factor = hfq_close/close
    2. pre_close = close(-1) * au_factor(-1)/au_factor
    :param begin_date: 开始日期
    :param end_date: 结束日期
    """
    all_codes = ts.get_stock_basics().index.tolist()  # get_all_codes()
    total = len(all_codes)
    for i, code in enumerate(all_codes):
        hfq_daily_cursor = DB_CONN['daily_hfq'].find(
            {
                'code': code,
                'date': {
                    '$lte': end_date,
                    '$gte': begin_date
                },
                'index': False
            },
            sort=[('date', ASCENDING)],
            projection={
                'date': True,
                'close': True
            })

        date_hfq_close_dict = dict([(x['date'], x['close'])
                                    for x in hfq_daily_cursor])

        daily_cursor = DB_CONN['daily'].find(
            {
                'code': code,
                'date': {
                    '$lte': end_date,
                    '$gte': begin_date
                },
                'index': False
            },
            sort=[('date', ASCENDING)],
            projection={
                'date': True,
                'close': True
            })

        last_close = -1
        last_au_factor = -1

        _tic = time.process_time()

        update_requests = []
        for daily in daily_cursor:
            date = daily['date']
            try:
                close = daily['close']

                doc = dict()

                au_factor = np.round(date_hfq_close_dict[date] / close, 2)
                doc['au_factor'] = au_factor
                if last_close != -1 and last_au_factor != -1:
                    pre_close = last_close * last_au_factor / au_factor
                    doc['pre_close'] = np.round(pre_close, 2)

                last_au_factor = au_factor
                last_close = close

                update_requests.append(
                    UpdateOne({
                        'code': code,
                        'date': date,
                        'index': False
                    }, {'$set': doc}))
            except:
                print('计算复权因子时发生错误,股票代码:%s,日期:%s' % (code, date), flush=True)
                # 恢复成初始值,防止用错
                last_close = -1
                last_au_factor = -1

        _toc = time.process_time()
        expect_time = (_toc - _tic) * (total - i - 1)

        if len(update_requests) > 0:
            update_result = DB_CONN['daily'].bulk_write(update_requests,
                                                        ordered=False)
            print('填充复权因子和前收,进度:(%s / %s), 股票:%s,更新:%4d条, 预计还需%.2fs' %
                  (i + 1, total, code, update_result.modified_count,
                   expect_time),
                  flush=True)
    def post(self, authority, dataclass):
        """
        [資料集批次寫入模式]
        可上傳一組完整格式的資料,且重複資料複寫
        命令格式: /v1/traffic_data/authority/{authority}/class/{dataclass}/standard/MOTC_traffic_v2/method/batch -X POST -d {data}
        ---
        tags:
          - MOTC Traffic v2 Upload API (提供上傳交通部「即時路況資料標準(V2.0)」格式資料)
        parameters:
          - in: path
            name: authority
            type: string
            required: true
            description: 業管機關簡碼(https://traffic-api-documentation.gitbook.io/traffic/xiang-dai-zhao-biao)
            enum: ['NFB', 'THB', 'TNN']
          - in: path
            name: dataclass
            type: string
            required: true
            description: 資料型態(依即時路況資料標準V2.0資料類型訂定,如VD、VDLive、LiveTraffic...)
            enum: ['VD', 'VDLive', 'CCTV', 'CMS', 'CMSLive', 'AVI', 'AVIPair', 'AVIPairLive',
                   'ETag', 'ETagPair', 'ETagPairLive', 'GVPLiveTraffic', 'CVPLiveTraffic',
                   'Section', 'SectionLink', 'LiveTraffic', 'CongestionLevel', 'SectionShape',
                   'News']
          - in: body
            name: data
            required: true
            description: 輸入一組資料(JSON格式)
        responses:
          200:
            description: OK
         """

        from api import mongo_client

        message = ''

        # 讀取API傳入參數
        args = self.parser.parse_args()

        # 參數轉小寫處理
        dataclass_lower = dataclass.lower()
        authority_lower = authority.lower()

        # MongoDB連結設定參數處理
        database = 'traffic_data_' + authority_lower
        collection = dataclass_lower
        db = mongo_client[database]
        collection = db[collection]

        # pyspark寫入語法
        # 輸入JSON文件
        data = request.get_json()
        update_time = data['UpdateTime']
        one_records = data[dataclass_record[dataclass]]
        update_records = []
        for one_record in one_records:
            if not ('DataCollectTime' in one_record) and not ('UpdateTime'
                                                              in one_record):
                # 靜態資料附加UpdateTime資訊
                one_record.update(
                    {'UpdateTime': aniso8601.parse_datetime(update_time)})
                update_records.append(
                    UpdateOne(
                        {
                            dataclass_id[dataclass]:
                            one_record[dataclass_id[dataclass]],
                            'UpdateTime':
                            one_record['UpdateTime']
                        }, {"$set": one_record},
                        upsert=True))
            else:
                one_record.update({
                    'DataCollectTime':
                    aniso8601.parse_datetime(one_record['DataCollectTime'])
                })
                update_records.append(
                    UpdateOne(
                        {
                            dataclass_id[dataclass]:
                            one_record[dataclass_id[dataclass]],
                            'DataCollectTime':
                            one_record['DataCollectTime']
                        }, {"$set": one_record},
                        upsert=True))
        try:
            collection.bulk_write(update_records)
        except BulkWriteError as e:
            message = e.details
            return {'message': message}, 404

        message = 'upload succeeded'

        return {'message': message}, 200
예제 #23
0
 async def post(self):
     r_dict = {'code': 0}
     try:
         subject_dimension_id_list = self.get_body_arguments(
             'subject_dimension_id_list[]', [])
         if subject_dimension_id_list:
             operate = self.get_argument('operate', None)
             if operate is not None:
                 status = None
                 id_list = [
                     ObjectId(subject_id)
                     for subject_id in subject_dimension_id_list
                 ]
                 if int(operate) == 1:
                     status = STATUS_SUBJECT_DIMENSION_ACTIVE
                     update_requests = []
                     for dimension_id in subject_dimension_id_list:
                         update_requests.append(
                             UpdateOne({'_id': ObjectId(dimension_id)}, {
                                 '$set': {
                                     'status': status,
                                     'updated_dt': datetime.datetime.now(),
                                     'updated_id': self.current_user.oid
                                 }
                             }))
                     await SubjectDimension.update_many(update_requests)
                 elif int(operate) == 0:
                     status = STATUS_SUBJECT_DIMENSION_INACTIVE
                     update_requests = []
                     for dimension_id in subject_dimension_id_list:
                         update_requests.append(
                             UpdateOne({'_id': ObjectId(dimension_id)}, {
                                 '$set': {
                                     'status': status,
                                     'updated_dt': datetime.datetime.now(),
                                     'updated_id': self.current_user.oid
                                 }
                             }))
                     await SubjectDimension.update_many(update_requests)
                 elif int(operate) == -1:
                     cid_list = await SubjectDimension.distinct(
                         'cid', {
                             '_id': {
                                 '$in': id_list
                             },
                             'parent_cid': {
                                 '$in': [None, '']
                             }
                         })
                     await SubjectDimension.delete_many(
                         {'_id': {
                             '$in': id_list
                         }})
                     await SubjectDimension.delete_many(
                         {'parent_cid': {
                             '$in': cid_list
                         }})
                 # 状态变更子维度也变更
                 if int(operate) in [1, 0] and status:
                     cid_list = await SubjectDimension.distinct(
                         'cid', {
                             '_id': {
                                 '$in': id_list
                             },
                             'parent_cid': {
                                 '$in': [None, '']
                             }
                         })
                     await update_subject_sub_dimension_status(
                         self.current_user.oid, status, cid_list)
                 r_dict['code'] = 1
     except RuntimeError:
         logger.error(traceback.format_exc())
     return r_dict
    def post(self, authority, dataclass, date):
        """
        [單筆靜態資料模式]
        僅可寫入單筆靜態資料,且重複資料複寫
        命令格式: /v1/traffic_data/authority/{authority}/class/{dataclass}/update/{date}/standard/MOTC_traffic_v2/method/one_record -X POST -d {data}
        ---
        tags:
          - MOTC Traffic v2 Upload API (提供上傳交通部「即時路況資料標準(V2.0)」格式資料)
        parameters:
          - in: path
            name: authority
            type: string
            required: true
            description: 業管機關簡碼(https://traffic-api-documentation.gitbook.io/traffic/xiang-dai-zhao-biao)
            enum: ['NFB', 'THB', 'TNN']
          - in: path
            name: dataclass
            type: string
            required: true
            description: 資料型態(依即時路況資料標準V2.0資料類型訂定,如VD、VDLive、LiveTraffic...)
            enum: ['VD', 'CCTV', 'CMS', 'AVI', 'AVIPair',
                   'ETag', 'ETagPair',
                   'Section', 'SectionLink', 'CongestionLevel', 'SectionShape']
          - in: path
            name: date
            type: string
            required: true
            description: 資料代表之時間(靜態資料參照欄位:UpdateTime)[格式:ISO8601]
            default: '2020-12-20T15:00:00+08:00'
          - in: body
            name: data
            required: true
            description: 輸入一筆資料(JSON格式)
        responses:
          200:
            description: OK
         """

        from api import mongo_client

        message = ''

        # 讀取API傳入參數
        args = self.parser.parse_args()

        # 參數轉小寫處理
        dataclass_lower = dataclass.lower()
        authority_lower = authority.lower()

        # MongoDB連結設定參數處理
        database = 'traffic_data_' + authority_lower
        collection = dataclass_lower
        db = mongo_client[database]
        collection = db[collection]

        # pyspark寫入語法
        # 輸入JSON文件
        one_record = request.get_json()
        update_time = date
        update_records = []
        one_record.update(
            {'UpdateTime': aniso8601.parse_datetime(update_time)})
        update_records.append(
            UpdateOne(
                {
                    dataclass_id[dataclass]:
                    one_record[dataclass_id[dataclass]],
                    'UpdateTime': one_record['UpdateTime']
                }, {"$set": one_record},
                upsert=True))
        try:
            collection.bulk_write(update_records)
        except BulkWriteError as e:
            message = e.details
            return {'message': message}, 404

        message = 'upload succeeded'

        return {'message': message}, 200
예제 #25
0
def get_botscore_by_userid(user):
    """
    Collects the botscore from Botometer

    Keyword arguments:
    user -- Twitter users' identificator
    """

    user_id = user.id
    try:
        botometer_instance = random.choice(keys)
        consumer_key = botometer_instance.consumer_key
        result = botometer_instance.check_account(user_id)
        return UpdateOne({'_id': make_objid(user_id)},
                         {'$set': {
                             'scores': result,
                             'screen_name': user.screen_name,
                             'id': user.id,
                             'error': 'None',
                             'checked': datetime.now().strftime("%d/%m/%Y")
                         }},
                         upsert=True
                         )
    except Exception as e:
        # Locked account (private)
        auth_match = re.search('Not authorized', str(e))
        timeline_match = re.search('has no tweets in timeline', str(e))
        notExist_match = re.search('Sorry, that page does not exist', str(e))
        overCapacity_match = re.search('Over capacity', str(e))

        if auth_match:
            return UpdateOne({'_id': make_objid(user_id)},
                             {'$unset': {'scores': ""},
                              '$set': {'screen_name': user.screen_name,
                                       'id': user.id,
                                       'error': 'not authorized',
                                       'checked': datetime.now().strftime("%d/%m/%Y")},
                              '$push': {'error_key_used': consumer_key}},
                             upsert=True
                             )
        elif overCapacity_match:
            return UpdateOne({'_id': make_objid(user_id)},
                             {'$unset': {'scores': ""},
                              '$set': {'screen_name': user.screen_name,
                                       'id': user.id,
                                       'error': 'over capacity',
                                       'checked': datetime.now().strftime("%d/%m/%Y")},
                              '$push': {'error_key_used': consumer_key}},
                             upsert=True
                             )
        elif timeline_match:
            # print("User", user_id, " has no tweets in timeline")
            return UpdateOne({'_id': make_objid(user_id)},
                             {'$unset': {'scores': ""},
                              '$set': {'screen_name': user.screen_name,
                                       'id': user.id,
                                       'error': 'has no tweets in timeline',
                                       'checked': datetime.now().strftime("%d/%m/%Y")}},
                             upsert=True
                             )
        elif notExist_match:
            # print("User", user_id, " does not exists anymore")
            return UpdateOne({'_id': make_objid(user_id)},
                             {'$unset': {'scores': ""},
                              '$set': {'error': 'does not exists anymore',
                              'checked': datetime.now().strftime("%d/%m/%Y")}},
                             upsert=True
                             )
        else:
            print("Exception. User:"******"API:", consumer_key, "Message:", e)

            return UpdateOne({'_id': make_objid(user_id)},
                             {'$unset': {'scores': ""},
                              '$set': {
                                 'screen_name': user.screen_name,
                                 'id': user.id,
                                 'error': str(e),
                                 'checked': datetime.now().strftime("%d/%m/%Y")}
                              },
                             upsert=True
                             )
예제 #26
0
def compute_fractal(begin_date, end_date):
    # 获取所有股票代码
    codes = get_all_codes()

    # 计算每个股票的信号
    for code in codes:
        try:
            # 获取后复权的价格,使用后复权的价格计算分型信号
            daily_cursor = DB_CONN['daily_hfq'].find(
                {
                    'code': code,
                    'date': {
                        '$gte': begin_date,
                        '$lte': end_date
                    },
                    'index': False
                },
                sort=[('date', ASCENDING)],
                projection={
                    'date': True,
                    'high': True,
                    'low': True,
                    '_id': False
                })

            df_daily = DataFrame([daily for daily in daily_cursor])

            # 设置日期作为索引
            df_daily.set_index(['date'], 1, inplace=True)

            # 通过shift,将前两天和后两天对齐到中间一天
            df_daily_shift_1 = df_daily.shift(1)
            df_daily_shift_2 = df_daily.shift(2)
            df_daily_shift_3 = df_daily.shift(3)
            df_daily_shift_4 = df_daily.shift(4)

            # 顶分型,中间日的最高价既大于前两天的最高价,也大于后两天的最高价
            df_daily['up'] = (df_daily_shift_3['high'] > df_daily_shift_1['high']) & \
                             (df_daily_shift_3['high'] > df_daily_shift_2['high']) & \
                             (df_daily_shift_3['high'] > df_daily_shift_4['high']) & \
                             (df_daily_shift_3['high'] > df_daily['high'])

            # 底分型,中间日的最低价既小于前两天的最低价,也小于后两天的最低价
            df_daily['down'] = (df_daily_shift_3['low'] < df_daily_shift_1['low']) & \
                               (df_daily_shift_3['low'] < df_daily_shift_2['low']) & \
                               (df_daily_shift_3['low'] < df_daily_shift_4['low']) & \
                               (df_daily_shift_3['low'] < df_daily['low'])

            # 只保留了出现顶分型和低分型信号的日期
            df_daily = df_daily[(df_daily['up'] | df_daily['down'])]

            # 抛掉不用的数据
            df_daily.drop(['high', 'low'], 1, inplace=True)

            print(df_daily)
            # 将信号保存到数据库 ,
            update_requests = []
            # 保存的数据结果时,code、date和信号的方向
            for date in df_daily.index:
                doc = {
                    'code': code,
                    'date': date,
                    # up: 顶分型, down:底分型
                    'direction': 'up' if df_daily.loc[date]['up'] else 'down'
                }

                # 保存时以code、date和direction做条件,那么就需要在这三个字段上建立索引
                # db.fractal_signal.createIndex({'code': 1, 'date': 1, 'direction': 1})
                update_requests.append(
                    UpdateOne(doc, {'$set': doc}, upsert=True))

            if len(update_requests) > 0:
                update_result = DB_CONN['fractal_signal'].bulk_write(
                    update_requests, ordered=False)
                print('%s, upserted: %4d, modified: %4d' %
                      (code, update_result.upserted_count,
                       update_result.modified_count),
                      flush=True)
        except:
            print('错误发生: %s' % code, flush=True)
            traceback.print_exc()
예제 #27
0
    def _load_institutions(
        self,
        formatted_sheet_data: FormattedSheetData,
    ):
        """
        Load institutions and their variables in to the database.

        Parameters
        ----------
        formatted_sheet_data: FormattedSheetData
            The data to be loaded into the database. Please see the FormattedSheetData class to view its attributes.
        """
        institution_primary_keys = [InstitutionField.name, InstitutionField.category]
        if InstitutionField.country in formatted_sheet_data.meta_data:
            institution_primary_keys.append(InstitutionField.country)
        # Create the list of update requests into the db, one for each institution
        institution_requests = [
            UpdateOne(
                {pk: institution.get(pk) for pk in institution_primary_keys},
                {
                    "$set": {
                        key: institution.get(key)
                        for key in institution.keys()
                        if key != "childs"
                    }
                },
                upsert=True,
            )
            for institution in formatted_sheet_data.formatted_data
        ]
        # Bulk write the institutions in the db
        institution_requests_results = self._db.get_collection(
            db_collection.institutions
        ).bulk_write(institution_requests)
        log.info(
            f"Loaded {institution_requests_results.upserted_count} {db_collection.institutions} "
            f"from sheet: {formatted_sheet_data.sheet_title}"
        )
        # Get doc id for each institution
        institution_doc_id_dict = {}
        for i, institution in enumerate(formatted_sheet_data.formatted_data):
            upserted_id = institution_requests_results.upserted_ids.get(i)
            if upserted_id is None:
                # The institution wasn't upserted
                # Find the doc
                institution_doc = self._db.get_collection(
                    db_collection.institutions
                ).find_one({pk: institution.get(pk) for pk in institution_primary_keys})
                institution_doc_id_dict[i] = institution_doc.get(InstitutionField._id)
            else:
                institution_doc_id_dict[i] = upserted_id

        # Create the list of update requests into the db, one for each variable
        variable_requests = [
            UpdateOne(
                {
                    VariableField.institution: institution_doc_id_dict.get(i),
                    VariableField.heading: child.get(VariableField.heading),
                    VariableField.name: child.get(VariableField.name),
                    VariableField.variable_index: child.get(
                        VariableField.variable_index
                    ),
                },
                {
                    "$set": {
                        VariableField.institution: institution_doc_id_dict.get(i),
                        **child,
                    }
                },
                upsert=True,
            )
            for i, institution in enumerate(formatted_sheet_data.formatted_data)
            for child in institution.get("childs")
        ]
        # Bulk write the variables in the db
        variable_requests_results = self._db.get_collection(
            db_collection.variables
        ).bulk_write(variable_requests)
        log.info(
            f"Loaded {variable_requests_results.upserted_count} {db_collection.variables} "
            f"from sheet: {formatted_sheet_data.sheet_title}"
        )
예제 #28
0
    async def process_cursor(self, skip=0):
        """Process mongodb cursor
        Transform data and move to new database

        Args:
            docs(:obj:`pymongo.Cursor`): documents to be processed
        """
        bulk_write = []
        query = {}
        if self.max_entries == float('inf'):
            limit = 0
        else:
            limit = self.max_entries
        docs = self.from_collection.find(filter=query, projection={'_id': 0},
                                        no_cursor_timeout=True, batch_size=500,
                                        skip=skip, limit=limit)
        i = 0
        async for doc in docs:
            i += 1
            if i == self.max_entries:
                break
            if i != 0 and i % 50 == 0:
                print("Processing file {}".format(i + skip))
                try:
                    await self.to_collection.bulk_write(bulk_write)
                    bulk_write = []
                except BulkWriteError as bwe:
                    pprint(bwe.details)
                    bulk_write = []
            uniprot_id = doc.get('uniprot_id')
            doc["add_id"] = [{"name_space": "gene_name_alt", "value": doc.get("gene_name_alt")},
                             {"name_space": "gene_name_orf", "value": doc.get("gene_name_orf")},
                             {"name_space": "gene_name_oln", "value": doc.get("gene_name_oln")}]
            doc.pop('gene_name_alt', None)
            doc.pop('gene_name_orf', None)
            doc.pop('gene_name_oln', None)
            doc['schema_version'] = "2"
            tax_doc = await motor_client_manager.client.get_database(
                "datanator-test")["taxon_tree"].find_one(filter={"tax_id": doc["ncbi_taxonomy_id"]},
                projection={'canon_anc_ids': 1, 'canon_anc_names': 1})
            if tax_doc is not None:
                doc['canon_anc_names'] = tax_doc["canon_anc_names"] 
                doc['canon_anc_ids'] = tax_doc["canon_anc_ids"] 
            modifications = doc.get('modifications')
            if modifications is not None:
                bw = []
                for mod in modifications:
                    mod['uniprot_id'] = uniprot_id
                    mod['schema_version'] = "2"
                    reference = mod['reference']
                    mod['reference'] = {"namespace": "doi", "value": reference["doi"]}
                    bw.append(json.loads(json.dumps(mod, ignore_nan=True)))  
                motor_client_manager.client.get_database(self.to_database)['protein_modifications'].insert_many(bw)
            doc.pop('modifications', None)
            bulk_write.append(UpdateOne({'uniprot_id': uniprot_id}, {'$set': json.loads(json.dumps(doc, ignore_nan=True))}, upsert=True))
        if len(bulk_write) != 0:
            try:
                self.to_collection.bulk_write(bulk_write)
            except BulkWriteError as bwe:
                pprint(bwe.details)
            finally:
                print("Done.")   
예제 #29
0
def run():
    calendar_list = []
    calendar_file_path = os.path.join(sys.path[0], 'calendar.dat')
    try:
        mongo = Mongo()
        # 交易日历
        exchange_calendar = mongo.gettable('z3_exchange_calendar')
        tmp_calendar_list = exchange_calendar.find({'open_close': 2})
        for calendar in tmp_calendar_list:
            calendar_list.append(calendar['trade_date'])
    except ServerSelectionTimeoutError:
        if not os.path.exists(calendar_file_path):
            return
        with open(calendar_file_path) as rfile:
            for calendar in rfile.readlines():
                calendar_list.append(calendar)


    calendar_dict = {}
    with open(calendar_file_path, 'w') as wfile:
        for calendar in calendar_list:
            wfile.write(str(calendar) + '\n')
            calendar_dict[int(calendar)] = 1
    calendar_date = int(time.strftime('%Y%m%d', time.localtime(time.time())))
    if calendar_date in calendar_dict:
        return

    for key, value in TABLES.items():
        if not value:
            continue
        logger.info('===init init===table = %s', key)
        collection = mongo.gettable(key)
        if collection.name == 'Z3_TOPIC_CHANGE':
            result = collection.find_one()
            value['_id'] = int(datetime.datetime.now().strftime("%Y%m%d"))
            value['topic_num'] = result.get('topic_num')
            Mongo.remove(collection)
            Mongo.insert(collection, value)
            logger.info('===update update===table = %s', key)
        elif collection.name == 'Z3_INDU_CHANGE':
            result = collection.find_one()
            value['_id'] = int(datetime.datetime.now().strftime("%Y%m%d"))
            value['indu_num'] = result.get('indu_num')
            Mongo.remove(collection)
            Mongo.insert(collection, value)
        else:
            if value.get('operator'):
                Mongo.remove(collection)
                logger.info('===remove remove===table = %s', key)
            else:
                Mongo.update(collection, value)
                logger.info('===update update===table = %s', key)
    # add by 20170731
    equity_profile_collection = mongo.gettable('z3_equity_profile')
    equity_profile_list = equity_profile_collection.find({'sec_type': 1, 'chi_spel': {'$ne': None}}, {'_id': 1, 'chi_spel': 1})
    requests = []
    for equity_profile in equity_profile_list:
        innercode = equity_profile.get('_id')
        chi_spel = equity_profile.get('chi_spel')
        requests.append(UpdateOne({'_id': innercode, 'type': 1}, {'$set': {'chi_spel': chi_spel}}))
    cap_info_subs_collection = mongo.gettable('z3_cap_info_subs')
    cap_info_subs_collection.bulk_write(requests, ordered=False)
예제 #30
0
파일: rb.py 프로젝트: MarsStirner/nvesta
    def fixate(self, new_version):
        """
        Фиксация изменений
        @return:
        """
        if new_version == self.meta.version:
            raise CannotFixate('New version equals current version')
        if new_version in [v.version for v in self.meta.versions]:
            raise CannotFixate(
                'New version already present in previous versions')

        from pymongo import InsertOne, UpdateOne, DeleteMany

        old_version = self.meta.version or new_version

        # noinspection PyListCreation
        requests = []

        # Если справочник был неверсионным и у нас записи без меты, надо забить её умолчаниями.
        self._fix_meta(new_version)

        # Новые удалённые - просто удаляем
        requests.append(
            DeleteMany(
                {'$and': [{
                    '_meta.delete': True
                }, {
                    '_meta.draft': True
                }]}))

        # Старые удалённые - помечаем конечной версией
        for db_record in self.collection.find(
            {'$and': [{
                '_meta.delete': True
            }, {
                '_meta.draft': False
            }]}):
            rb_record = self.record_factory(db_record)
            rb_record.meta.delete = False
            rb_record.meta.draft = False
            rb_record.meta.edit = None
            rb_record.meta.end_version = old_version
            requests.append(
                UpdateOne(
                    {'_id': rb_record._id},
                    {'$set': rb_record.as_db_record()},
                ))

        # Новые записи - помечаем начальной версией
        for db_record in self.collection.find(
            {'$and': [
                {
                    '_meta.delete': False
                },
                {
                    '_meta.draft': True
                },
            ]}):
            rb_record = self.record_factory(db_record)
            rb_record.meta.draft = False
            rb_record.meta.delete = False
            rb_record.meta.end_version = None
            rb_record.meta.beg_version = new_version
            requests.append(
                UpdateOne(
                    {'_id': rb_record._id},
                    {'$set': rb_record.as_db_record()},
                ))

        # Изменённые - создаём новую, а старую помечаем конечной версией
        for db_record in self.collection.find({'_meta.edit': {'$ne': None}}):
            rb_record = self.record_factory(db_record)

            new = self.record_factory(rb_record.meta.edit)
            new.meta.beg_version = new_version
            new.meta.end_version = None
            new.meta.delete = False
            new.meta.draft = False
            requests.append(InsertOne(new.as_db_record()))

            rb_record.meta.end_version = old_version
            rb_record.meta.edit = None
            rb_record.meta.delete = False
            rb_record.meta.draft = False
            requests.append(
                UpdateOne(
                    {'_id': rb_record._id},
                    {'$set': rb_record.as_db_record()},
                ))

        self.collection.bulk_write(requests)
        self.meta.version = new_version

        from nvesta.library.rb.rbmeta import RefBookVersionMeta

        version_meta = RefBookVersionMeta()
        version_meta.version = new_version
        version_meta.fix_datetime = datetime.datetime.utcnow()
        self.meta.versions.append(version_meta)
        self.meta.reshape()