def query(self, codes: list, fields: list, startdate: datetime, enddate: datetime, force_update=False, update_only=False, skip_update=False) -> defaultdict: # Convert string code to list if force_update: # Remove targeted data from database if deemed outdated self.remove(codes, startdate, enddate, fields) if not skip_update: self._auto_update(codes, startdate, enddate, fields) res: defaultdict = defaultdict(DataFrame) if not update_only: codes = mongodb_name_compliance(codes) fields = mongodb_name_compliance(fields) for field in fields: subcol = self.col[field] q_doc = {self.date_name: {'$gte': startdate, '$lte': enddate}} v = DataFrame(subcol.find(q_doc, codes + [self.date_name])) df = del_id(v) df.columns = [c.replace('~', '.') for c in df.columns] res[field] = df return res
def solve_remove_params(self, codes: list, fields: list, start: datetime, end: datetime): target_date_range = [start, end + timedelta(1)] status = self.status[codes, fields] for code in codes: for field in fields: comp_code = mongodb_name_compliance(code) comp_field = mongodb_name_compliance(field) bubbles = status[comp_code, comp_field] gaps: Bubbles = bubbles.intersect(target_date_range) if not gaps.isempty: yield code, field, bubbles, gaps
def solve_update_params(self, codes: list, fields: list, start: datetime, end: datetime): '''Solve params for data that need to be downloaded''' target_date_range = [start, end + timedelta(1)] status = self.status[codes, fields] for code in codes: for field in fields: comp_code = mongodb_name_compliance(code) comp_field = mongodb_name_compliance(field) bubbles = status[comp_code, comp_field] gaps: Bubbles = bubbles.gaps(target_date_range) if not gaps.isempty: yield code, field, bubbles, gaps
def __getitem__(self, key): codes, fields = key codes = [codes] if isinstance(codes, str) else codes fields = [fields] if isinstance(fields, str) else fields codes = mongodb_name_compliance(codes) fields = mongodb_name_compliance(fields) if isinstance(key[0], str) and isinstance(key[1], str): r = self.col.find_one({'code': codes[0]}, fields) try: return Bubbles(r[fields]) except (TypeError, KeyError): return Bubbles() else: docs = self.col.find({'code': {'$in': codes}}, fields + ['code']) # TODO: unpack result res = defaultdict(Bubbles) for doc in docs: q_fields = set(doc.keys()) - {'code', '_id'} for f in q_fields: res[doc['code'], f] = Bubbles(doc[f]) return res
def write_batch_to_db(batches): def convert_2_bulks(df): bulks = [] for i, v in df.dropna().iterrows(): q_doc = {self.date_name: i.to_pydatetime()} u_doc = v.to_dict() bulks.append(UpdateOne(q_doc, {'$set': u_doc}, upsert=True)) return bulks for field, df in batches.items(): df.columns = mongodb_name_compliance(df.columns) bulks = convert_2_bulks(df) subcol = self.col[field.upper().replace('.', '~')] if bulks: # if not empty subcol.bulk_write(bulks, ordered=False)
def create_index(): for field in mongodb_name_compliance(fields): subcol = self.col[field] subcol.create_index(self.date_name, unique=True)