def get_company_settings(): company_settings = {} companies_by_types = N1QLRequest( _N1QLQuery('SELECT META().id,* FROM ce WHERE type=$1', 3), cb) for comp_obj in companies_by_types: if comp_obj['ce'].get('companyState') != 'ACTIVE': continue company_settings[comp_obj['id']] = comp_obj['ce'] return company_settings
def read_company_settings(limit=10, offset=0): company_settings = [] query = _N1QLQuery('SELECT META().id,* FROM ce WHERE type=$1', 3) query.timeout = 8000 companies_by_types = N1QLRequest(query, cb_read) # companies_by_types = N1QLRequest( # _N1QLQuery('SELECT META().id,* FROM ce WHERE type=$1 order by id LIMIT $2 OFFSET $3', 3, limit, offset), cb) for comp_obj in companies_by_types: company_settings.append(comp_obj['id']) return company_settings
def to_n1ql_query(self, statement, *options, **kwargs): # lets make a copy of the options, and update with kwargs... args = self.copy() args.update(kwargs) # now lets get positional parameters. Actual positional # params OVERRIDE positional_parameters positional_parameters = args.pop('positional_parameters', []) if options and len(options) > 0: positional_parameters = options # now the named parameters. NOTE: all the kwargs that are # not VALID_OPTS must be named parameters, and the kwargs # OVERRIDE the list of named_parameters new_keys = list(filter(lambda x: x not in self.VALID_OPTS, args.keys())) named_parameters = args.pop('named_parameters', {}) for k in new_keys: named_parameters[k] = args[k] query = _N1QLQuery(statement, *positional_parameters, **named_parameters) # now lets try to setup the options. TODO: rework this after beta.3 # but for now we will use the existing _N1QLQuery. Could be we can # add to it, etc... # default to false on metrics query.metrics = args.get('metrics', False) # TODO: there is surely a cleaner way... for k in self.VALID_OPTS: v = args.get(k, None) if v: if k == 'scan_consistency': query.consistency = v.value if k == 'consistent_with': query.consistent_with = v if k == 'adhoc': query.adhoc = v if k == 'timeout': query.timeout = v.total_seconds() if k == 'scan_cap': query.scan_cap = v if k == 'pipeline_batch': query.pipeline_batch = v if k == 'pipeline_cap': query.pipeline_cap = v if k == 'read_only': query.readonly = v if k == 'profile': query.profile = v.value return query
def read_media_by_company(comp_id, offset=0): global processed_companies, invalid_data_writer print(f'Reading company data for company - {comp_id}') path = get_dir('downloaded', sub_dir) file = os.path.join(path, f'downloaded_{comp_id}.csv') if os.path.exists(file): offset = get_offset(file) fl = get_dir('string_ids', sub_dir) str_ids = open(f'{fl}/string_ids_{comp_id}.csv', 'w') str_writer = csv.writer(str_ids) limit = 100 cb.n1ql_timeout = 8000 with open(file, 'a') as f: csv_writer = csv.writer(f) while True: media_records = [] try: query = _N1QLQuery( 'SELECT META().id, * FROM ce WHERE companyId=$1 and type in $2 order by id LIMIT $3 OFFSET $4', comp_id, mediaTypes, limit, offset) query.timeout = 8000 medias = N1QLRequest(query, cb) #long media id # if medias.metrics.get('resultCount', 0) == 0 : # break for media_obj in medias: try: mId = int(media_obj['id']) except Exception as e: str_writer.writerow([json.dumps(media_obj)]) continue media_records.append([json.dumps(media_obj)]) if len(media_records) == 0: break print( f'Successfully read medias from db - {comp_id} - offset - {offset} - limit - {limit}' ) csv_writer.writerows(media_records) offset += limit except Exception as e: failed_db_writer.writerow([comp_id, offset]) print( f"Exception reading medias from db - {comp_id} - offset - {offset}" ) return str_ids.close() print(f'Finished Reading company data for company - {comp_id}')
def read_company_settings_from_db(): print('Reading company settings from db...') company_settings = {} companies_by_types = N1QLRequest( _N1QLQuery('SELECT META().id,* FROM ce WHERE type=$1', 3), cb) for comp_obj in companies_by_types: if comp_obj['ce'].get('companyState') != 'ACTIVE': continue company_settings[comp_obj['id']] = comp_obj['ce'] fl = f'company_settings_{sub_dir}.csv' with open(fl, 'w') as f: csv_writer = csv.writer(f) for cId, cObj in company_settings.items(): csv_writer.writerow([cId, json.dumps(cObj)]) print('Successfully read company settings')
def get_media_count_by_company(company_settings, comp_type): comp_list = get_companies_by_type(company_settings, comp_type) comp_map = {} for comp in comp_list: medias = N1QLRequest( _N1QLQuery( 'SELECT count(*) as cnt FROM ce WHERE companyId=$1 and type in $2', comp, mediaTypes), cb) # if medias.metrics.get('resultCount', 0) == 0: # break medias = list(medias)[0] comp_map[comp] = medias.get('cnt', 0) with open(f'{comp_type}_companies_cnt.csv', 'w') as f: writer = csv.writer(f) for comp in sorted(comp_map, key=comp_map.get): writer.writerow([comp, comp_map[comp]])