def combinedFacilityList(data): dbManager = SharedMemoryManager.getInstance() db = dbManager.query() result = ModelFacility.getActiveFacility() for state_code in data: for facility_code in data[state_code]: state_facility_exist = list( filter( lambda x: x['state_name'] == state_code and x[ 'facility_code'] == facility_code, result)) # Logger.v('state_facility_exist', state_facility_exist, state_code, facility_code, data[state_code][facility_code]['name']); if not state_facility_exist: result.append({ 'state_code': state_code, 'state_name': state_code, 'ptj_code': '', 'ptj_name': '', 'facility_code': facility_code, 'facility_name': data[state_code][facility_code]['name'], 'facility_type': '', 'active': 'a' }) return result
def getMissingDates(data): dbManager = SharedMemoryManager.getInstance(); db = dbManager.query(); missing_dates = {}; today = DateTime.now(tzinfo=msia_tz); # date only state_by = 'state_code'; states = list(db['state'].find({},{'_id': 0, state_by: 1})); current_year = DateTime.getDateCategoryName(date=DateTime.now(tzinfo=msia_tz), element='year'); for rk in data: row = data[rk]; if rk not in missing_dates: missing_dates[rk] = []; dates = groupDates(params={'states': states, 'state_by': state_by}, data=row); for date in dates['missing']: end_date_of_month = DateTime.getDaysAgo(days_to_crawl=1, datefrom=DateTime.getNextMonth(DateTime.convertDateTimeFromString(date))); day_diff = DateTime.getDifferenceBetweenDuration([today, end_date_of_month]); if day_diff >= 0: date_str = DateTime.toString(today); else: date_str = DateTime.toString(end_date_of_month); if date_str not in dates['crawled']: missing_dates[rk].append(date_str); # Logger.v('day_diff', day_diff); # Logger.v('date', DateTime.getDaysAgo(days_to_crawl=1, datefrom=DateTime.getNextMonth(DateTime.convertDateTimeFromString(ed)))); missing_dates[rk] = sorted(list(set(missing_dates[rk])), reverse=True); return missing_dates;
def update(data): global msia_tz, column_keymap, collection_name dbManager = SharedMemoryManager.getInstance() db = dbManager.query() state_facility_code = '_'.join( [str(data['state']), str(data['facility_code'])]) if state_facility_code not in list(set(unique_facility)): state_name = fn.getNestedElement(data, 'state') state_code = fn.getNestedElement(data, 'state') facility_name = fn.getNestedElement(data, 'facility_name') facility_code = fn.getNestedElement(data, 'facility_code') date = fn.getNestedElement(data, 'date') date_string = DateTime.toString(date) values = { 'state_name': state_name, 'state_code': state_code, 'facility_name': facility_name, 'facility_code': facility_code, 'state_updated_at': date_string, 'facility_updated_at': date_string, 'date': date_string, } dbManager.addBulkInsert(collection_name, values, batch=True) unique_facility.append(state_facility_code) dbManager.executeBulkOperations(collection_name)
def save(params, chunk, chunks_info): global collection_name, column_keymap; upload_date = fn.getNestedElement(params, 'date'); data = File.readChunkData(chunk); dbManager = SharedMemoryManager.getInstance(); db = dbManager.query(); current_index = fn.getNestedElement(chunks_info, 'current', 0); total_index = fn.getNestedElement(chunks_info, 'total', len(data)); total_length = len(data); queue_info = chunks_info['queue'] # Logger.v('Running Index:', chunks_info['queue']['running']); chunks_info['queue']['current']+=1; # Logger.v('Saving from... {0}/{1}, current package: {2}'.format(current_index, total_index, total_length) ); fn.printProgressBar(queue_info['current'], queue_info['total'], 'Processing Chunk Insertion'); for idx in range(0, total_length): row = data[idx]; # Logger.v('row', row); obj_ = transformToLowercase(row); date_only = obj_['approved_date'].split(' ')[0]; # Logger.v('date_only', date_only); obj_.update({ 'approved_year_month': DateTime.getDateCategoryName(date=date_only, element='year_month_digit'), 'upload_date': upload_date, }); dbManager.addBulkInsert(collection_name, obj_, batch=True); ModelSIIntegrity.update(data=obj_); retrieveIssueOption(obj_); #ensure all data is save properly dbManager.executeBulkOperations(collection_name); return chunks_info;
def upload(params): Debug = DebugManager.DebugManager(); Debug.start(); Debug.trace('start'); dbManager = SharedMemoryManager.getInstance(); db = dbManager.query(); date = fn.getNestedElement(params, 'date'); path = fn.getNestedElement(params, 'path'); # url = fn.getNestedElement(params, 'callback_url'); # required params to handle callback_url paths, should_reset = ModelUpload.getPath(params); for idx in range(0, len(paths)): p = paths[idx]; processed_filename = File.converExcelFileToCsv(p, ignore_index=True); Logger.v('processed_filename', processed_filename); Debug.trace('convert to json : path {0}'.format( processed_filename ) ); if idx == 0 and should_reset: #reset once at the beginning Logger.v('Reset Database.'); reset(date); #reset stock_issue collection ModelSIIntegrity.reset(date); #reset stock_issue_datalog by date given File.readCsvFileInChunks(processed_filename, save, params, chunksize=chunksize); Debug.trace('uploaded to mongo.'); generateIndex(); ModelSIIntegrity.generateIndex(); Debug.trace('indexing mongo collection.'); saveIssueOption(); Debug.trace('save option to json.'); trigger_params = copy.deepcopy(params); trigger_params['result'] = 'data count: {0}'.format(params['data_count'][path]); # Logger.v('trigger_params', trigger_params); dbManager.executeBulkOperations(None); # Insert all the remaining job at once. ReportStock.triggerOnComplete(trigger_params); Debug.trace('trigger api on complete.'); Debug.end(); Debug.show('Stock.upload');
def getBackdateList(params): dbManager = SharedMemoryManager.getInstance() db = dbManager.query() dates = [] for idx in range(0, date_retrieve_limit + 1): # 7 days backward + 1 today if idx == 0: collection_name = 'stock_latest' else: collection_name = 'stock_{0}'.format(idx) # Logger.v('collection_name', collection_name); data = list(db[collection_name].find({}, { '_id': 0, 'date': 1 }).limit(1)) if data: date = DateTime.toString(data[0]['date']) # Logger.v('date', date); dates.append(date) # Logger.v('data', data); # Logger.v('dates', sorted(list(set(dates)), reverse=True)); result = { 'date': sorted(list(set(dates)), reverse=True) } return result
def save(params, chunk, chunks_info): global latest_collection_name, history_collection_name data = File.readChunkData(chunk) dbManager = SharedMemoryManager.getInstance() db = dbManager.query() current_index = fn.getNestedElement(chunks_info, 'current', 0) total_index = fn.getNestedElement(chunks_info, 'total', len(data)) date = fn.getNestedElement(params, 'date') datetime = DateTime.convertDateTimeFromString(date) total_length = len(data) queue_info = chunks_info['queue'] # Logger.v('Running Index:', chunks_info['queue']['running']); chunks_info['queue']['current'] += 1 # Logger.v('Saving from... {0}/{1}, current package: {2}'.format(current_index, total_index, total_length) ); fn.printProgressBar(queue_info['current'], queue_info['total'], 'Processing Chunk Insertion') for idx in range(0, total_length): # insert stock_latest row = data[idx] obj_ = transformToLowercase(data=row, datetime=datetime) ModelStockIntegrity.update(data=obj_) dbManager.addBulkInsert(latest_collection_name, obj_, batch=True) # dbManager.addBulkInsert(history_collection_name, obj_, batch=True); # temporary off (need 7 day data only) # insert items # d = data[idx]; ModelItem.saveItem(row) # fn.printProgressBar(current_index+idx, total_index, 'Processing Item Insertion'); #ensure all data is save properly # dbManager.executeBulkOperations(history_collection_name); # temporary off (need 7 day data only) dbManager.executeBulkOperations(latest_collection_name) return chunks_info
def get(params): drug_codes = fn.getNestedElement(params, 'drug_nondrug_code', []); state = fn.getNestedElement(params, 'state'); requester_group = fn.getNestedElement(params, 'requester_group'); issue_type = fn.getNestedElement(params, 'issue_type'); dbManager = SharedMemoryManager.getInstance(); db = dbManager.query(); match_query = { 'state': state.replace('_', ' '), }; if drug_codes: match_query['drug_nondrug_code'] = {'$in': drug_codes}; if not requester_group == 'all' and requester_group: match_query['requester_group_name'] = requester_group.replace('_', ' '); if not issue_type == 'all' and issue_type: match_query['issue_type'] = issue_type.replace('_', ' '); data = list(db[collection_name].aggregate([ { '$match': match_query, }, { '$project': {'_id': 0, 'inserted_at': 0, 'updated_at': 0} } ])); data_length = len(data); # Logger.v('data length', data_length, data); return data;
def refreshIsRequired(data, collection_name): dbManager = SharedMemoryManager.getInstance() db = dbManager.query() refresh_collection = False mongo_data = list(db[collection_name].find({})) # Logger.v('mongo_data', mongo_data); unique_values = [] for row in data: obj_ = {} unique_value = generateUniqueValue(data=row, collection_name=collection_name) unique_values.append('_'.join(unique_value)) matched_row = db[collection_name].find( {'unique_value': { '$in': unique_values }}) matched_result = list(matched_row) # Logger.v('matched_result', matched_result) if not len(matched_result) == len(mongo_data) or len( mongo_data) == 0: # if there is difference between mongodb and raw Logger.v('matched_result len', len(matched_result)) Logger.v('mongo_data len', len(mongo_data)) refresh_collection = True return refresh_collection return refresh_collection
def check(params): global msia_tz, date_retrieve_limit, date_count, collection_name dbManager = SharedMemoryManager.getInstance() db = dbManager.query() today = DateTime.now(tzinfo=msia_tz) start_date = DateTime.getDaysAgo(date_retrieve_limit, datefrom=today) durations = DateTime.getBetween([start_date, today], element='date', offset=24)['order'] # offset 24 to include today Logger.v('durations', durations) data = db[collection_name].aggregate([{ '$match': { 'state_updated_at': { '$in': durations }, 'facility_updated_at': { '$in': durations } } }, { '$project': { '_id': 0, 'inserted_at': 0, 'updated_at': 0 } }]) data = list(data) Logger.v('Total stock issue integrity in', date_retrieve_limit, 'days:', len(data)) state_data = {} facility_data_by_state = {} for idx in range(0, len(data)): row = data[idx] state_code = fn.getNestedElement(row, 'state_code') if state_code not in facility_data_by_state: facility_data_by_state[state_code] = {} state_data = addIntegrityData(data={ 'row': row, 'to_update': state_data }, category='state') facility_data_by_state[state_code] = addIntegrityData( data={ 'row': row, 'to_update': facility_data_by_state[state_code] }, category='facility') if date_count > date_retrieve_limit: # limit loop data/ show data in N days break date_count = 0 # reset to 0th day return { 'state': state_data, 'state_facility': facility_data_by_state, }
def checkAvailableMonth(): limit = 6; dbManager = SharedMemoryManager.getInstance(); db = dbManager.query(); data = list(db[collection_name].find({}, {'_id':0, 'approved_year_month': 1})); df = pd.DataFrame(data); months = df['approved_year_month'].sort_values(ascending=False).unique().tolist(); return months[:limit];
def reset(date): global collection_name, stock_issue_options; stock_issue_options = {}; dbManager = SharedMemoryManager.getInstance(); db = dbManager.query(); query = { 'upload_date': date, }; db[collection_name].delete_many(query);
def reset(date): global unique_facility unique_facility = [] dbManager = SharedMemoryManager.getInstance() db = dbManager.query() query = { 'date': date, } db[collection_name].delete_many(query)
def generateIndex(): global collection_name dbManager = SharedMemoryManager.getInstance() db = dbManager.query() db[collection_name].create_index([('state_updated_at', -1), ('facility_updated_at', -1)]) db[collection_name].create_index([('state_name', 1), ('state_code', 1), ('facility_name', 1), ('facility_code', 1)])
def generateIndex(): global latest_collection_name, history_collection_name dbManager = SharedMemoryManager.getInstance() db = dbManager.query() for collection_name in [latest_collection_name, history_collection_name]: db[collection_name].create_index([('date', -1), ('item_code', 1), ('state', 1), ('facility_code', 1), ('requester_unit_code', 1)]) db[collection_name].create_index([('item_desc', TEXT)], default_language='english')
def refreshCollection(data, collection_name): dbManager = SharedMemoryManager.getInstance() db = dbManager.query() db[collection_name].delete_many({}) for row in data: unique_value = generateUniqueValue(data=row, collection_name=collection_name) obj_ = generateKeyValue(data=row) obj_.update({'unique_value': '_'.join(unique_value)}) # Logger.v('obj_', obj_); dbManager.addBulkInsert(collection_name, obj_, batch=True) dbManager.executeBulkOperations(None)
def createSchedules(args={}): #upid, page_type global filter_page_type Debug = DebugManager.DebugManager() Debug.start() dbManager = SharedMemoryManager.getInstance() # crawl_duration = fn.getNestedElement(fn.config,'CRAWL_DURATION', 12); incomplete_task, incomplete_task_count = checkRemaining() new_queue_count = 0 # Logger.v(incomplete_task_count, incomplete_task, filter_page_type); extra_params = { 'crawl_comment': fn.getNestedElement(args, 'crawl_comment', None) } extra_params = {k: v for k, v in extra_params.items() if v is not None} for platform in filter_page_type: if args and not platform in fn.getNestedElement( args, 'page_type', platform).split(','): Logger.v('Skip Platform:%s' % (platform)) continue # skip when page_type appear and not same pages = fn.getNestedElement(args, 'pages.{0}'.format(platform), []) Logger.v('platform', platform) # Logger.v('page', args['pages']['budget']); for page in pages: #Create queue for each # Logger.v('page', page); Queue.create(page, extra_params=extra_params, priority=fn.getNestedElement(args, 'priority', 'daily'), batch=True) new_queue_count += 1 Logger.v('new_queue_count', new_queue_count) # Debug.trace(); Logger.v('Incomplete:%s, New Queue: %s' % (incomplete_task_count, new_queue_count)) if incomplete_task_count > (new_queue_count * int(fn.config['DEBUG_CRAWL_WARNING']) / 100) or incomplete_task_count > int( fn.config['DEBUG_CRAWL_WARNING']): # Mail.send('[%s]Incomplete Crawl [%s], Current Schedule: [%s]'%(DateTime.getReadableDate(DateTime.now()), # incomplete_task_count, new_queue_count), # fn.dumps(incomplete_task, encode=False) # ); pass result = { 'pending_count': new_queue_count, 'incomplete_count': incomplete_task_count } dbManager.executeBulkOperations(None) # Debug.show('Create Schedule'); return Params.generate(True, result)
def accept_conn(s): sharedManager = SharedMemoryManager.Manager() dbManager = SharedMemoryManager.getInstance() db = dbManager.query(index=False) dbManager.close() while True: try: #accept and receive socket connection conn, addr = s.accept() _thread.start_new_thread(process, (conn, addr, dbManager)) except Exception as ex: print(ex) traceback.print_exc()
def getPath(params): global upload_log_collection_name dbManager = SharedMemoryManager.getInstance() db = dbManager.query() date = fn.getNestedElement(params, 'date') path = fn.getNestedElement(params, 'path') group = fn.getNestedElement(params, 'group') data_part = fn.getNestedElement(params, 'data_part', 'default') if type(group) == str: group = group.lower() if type(data_part) == str: data_part = data_part.lower() dbManager = SharedMemoryManager.getInstance() db = dbManager.query() query = { 'date': date, 'collection': group, } stock_upload_log = list(db[upload_log_collection_name].find( query, { '_id': 0, 'inserted_at': 0, 'updated_at': 0 })) # Logger.v('stock_upload_log', stock_upload_log); part_of_the_day = [] if stock_upload_log: part_of_the_day = stock_upload_log[0]['part_of_the_day'] paths = [] for part in part_of_the_day: paths.append(stock_upload_log[0][part]) updateLog(params) should_reset = True if not data_part in part_of_the_day: should_reset = False if part_of_the_day else True paths = [path] Logger.v('Upload paths:', paths, should_reset) return paths, should_reset
def updateLog(params): global upload_log_collection_name dbManager = SharedMemoryManager.getInstance() db = dbManager.query() date = fn.getNestedElement(params, 'date') path = fn.getNestedElement(params, 'path') group = fn.getNestedElement(params, 'group') data_part = fn.getNestedElement(params, 'data_part', 'default') if type(group) == str: group = group.lower() if type(data_part) == str: data_part = data_part.lower() query = { 'date': date, 'collection': group, } stock_upload_log = list(db[upload_log_collection_name].find( query, { '_id': 0, 'inserted_at': 0, 'updated_at': 0 })) # Logger.v('stock_upload_log', stock_upload_log); if not stock_upload_log and group == 'stock': # Logger.v('backdate collection'); ModelStock.backdateCollection() # Logger.v('update upload_log collection'); values = {} if stock_upload_log: if 'part_of_the_day' not in values: values['part_of_the_day'] = [] for part in stock_upload_log[0]['part_of_the_day']: # Logger.v('part', part); values['part_of_the_day'].append(part) values[part] = stock_upload_log[0][part] if data_part not in stock_upload_log[0]['part_of_the_day']: values['part_of_the_day'].append(data_part) values[data_part] = path else: values['part_of_the_day'] = [data_part] values[data_part] = path # Logger.v('query', query, values) # exit(); dbManager.addBulkUpdate(upload_log_collection_name, query, values, upsert=True, batch=False)
def checkEmpty(params): global global_check_data; dbManager = SharedMemoryManager.getInstance(); db = dbManager.query(); custom_params = copy.deepcopy(params); report_keys = fn.getNestedElement(params, 'keys.report', ['procurement', 'budget']); interval = fn.getNestedElement(params, 'interval', 1); past_dates = DateTime.getPastDate(count=12, duration=interval); # check previous 12 month data year = Crawl.extractYear(data=past_dates[0]); first_date = past_dates[0][-1][0]; last_date = past_dates[0][0][1]; # Logger.v('first_date', first_date, 'last_date', last_date); state_by = 'state_code'; states = list(db['state'].find({},{'_id': 0, state_by: 1})); result = {}; datetime = DateTime.toString(DateTime.now(tzinfo=msia_tz), date_format='%Y-%m-%d-%H-%M-%S'); custom_params['first_date'] = first_date; custom_params['last_date'] = last_date; custom_params['state_by'] = state_by; custom_params['states'] = states; temp_result = generateTemplate(params=custom_params); for rk in report_keys: if rk not in global_check_data: global_check_data[rk] = []; for y in year: root_path = '{0}/{1}/year_{2}'.format(crawl_folder, rk, y); openDir(root_path, rk); for gcd in global_check_data[rk]: date = gcd.split('_')[0]; state = gcd.split('_')[1]; if DateTime.inrange(date, [first_date, last_date]): try: temp_result[rk][date][state] += 1; except Exception as e: # Logger.v('Main.checkEmpty:', e); pass; for rk in temp_result: if rk not in result: result[rk] = []; for date in temp_result[rk]: result[rk].append(temp_result[rk][date]); filename = '{0}/{1}_check_moh_empty'.format(test_folder, rk); # filename = 'tests/{0}_{1}_check_moh_empty'.format(rk, datetime); fn.writeExcelFile(filename=filename, data=result[rk]); global_check_data = {}; return result;
def retrieveOption(collection_name='state', show_keys=[], hide_keys=[]): dbManager = SharedMemoryManager.getInstance() db = dbManager.query() return_keys = {} for key in show_keys: return_keys.update({key: 1}) for key in hide_keys: return_keys.update({key: 0}) # Logger.v('return_keys', return_keys); if return_keys: result = list(db[collection_name].find({}, return_keys)) else: result = list(db[collection_name].find({})) return result
def get(params): dbManager = SharedMemoryManager.getInstance() db = dbManager.query() data = [] getCollectionName(params) query = getQuery(params) Logger.v('Get data from collection:', latest_collection_name) data = list(db[latest_collection_name].aggregate([{ '$match': query }, { '$project': { '_id': 0, 'inserted_at': 0, 'updated_at': 0 } }])) Logger.v('data length', len(data)) return data
def getCollectionName(params): global latest_collection_name latest_collection_name = 'stock_latest' # set default; dbManager = SharedMemoryManager.getInstance() db = dbManager.query() data = list(db[latest_collection_name].find({}, { '_id': 0, 'date': 1 }).limit(1)) if data: latest_date_string = DateTime.toString(data[0]['date']) latest_date = DateTime.convertDateTimeFromString(latest_date_string) date_string = fn.getNestedElement(params, 'date', None) if date_string: date = DateTime.convertDateTimeFromString(date_string) different = latest_date - date day_diff = math.floor(different.total_seconds() / float(86400)) if day_diff > 0: latest_collection_name = 'stock_{0}'.format(day_diff)
def backdateCollection(days=date_retrieve_limit): global latest_collection_name dbManager = SharedMemoryManager.getInstance() for idx in range(days, 0, -1): collection_names = dbManager.getCollectionNames() col_name = 'stock_{0}'.format(idx) if idx > 1: previous_col_name = 'stock_{0}'.format(idx - 1) else: previous_col_name = latest_collection_name if col_name in collection_names: dbManager.dropCollection(col_name) if previous_col_name in collection_names: Logger.v('rename', previous_col_name, 'to', col_name) dbManager.renameCollection(previous_col_name, col_name) else: Logger.v('create', col_name) dbManager.createCollection(col_name) dbManager.createCollection(latest_collection_name)
def getData(params): report_name = Report.getReportName(params) dbManager = SharedMemoryManager.getInstance() db = dbManager.query() limit_data = 5000 # 5000 (used 30+- second) query = generateQuery(params=params) # Logger.v('query', query); data = list(db[report_name].find(query, {'_id': 0}))[:limit_data] # direct load from mongodb # data = File.readJson('crawled_data/testing_purpose/{0}.json'.format(report_name)); # TEST using demo data # data = File.readJson('crawled_data/testing_purpose/pivot_{0}.json'.format(report_name)); # TEST using pivot data # lambda_function = lambda d: d['ptj_code'] in ['010619']; # data = list(filter(lambda_function, data)); # fn.writeExcelFile(filename='crawled_data/testing_purpose/pivot_{0}'.format(report_name), data=data); # fn.writeJSONFile(filename='crawled_data/testing_purpose/pivot_{0}.json'.format(report_name), data=data); Logger.v('data length', len(data)) return data
def generateCrawlParam(params): Debug = DebugManager.DebugManager() Debug.start() global pass_month_quantity dbManager = SharedMemoryManager.getInstance() db = dbManager.query() crawl_params = {} limit_for_test = 10 report_keys = fn.getNestedElement(params, 'keys.report', ['budget', 'procurement']) interval = fn.getNestedElement(params, 'interval', 1) filter_facility_code = fn.getNestedElement(params, 'filter.facility_code', True) check_empty = fn.getNestedElement(params, 'schedule_params.check_empty', False) today = fn.getNestedElement( params, 'schedule_params.today', DateTime.toString(DateTime.now(tzinfo=msia_tz))) # Logger.v('filter_facility_code', filter_facility_code); if check_empty: # past_dates = DateTime.getPastDate(count=pass_month_quantity, duration=interval); past_dates = DateTime.getPastDate( count=pass_month_quantity, duration=interval, end=DateTime.convertDateTimeFromString(today)) # Logger.v('past_dates', past_dates); # exit(); else: past_dates = DateTime.getPastDate(count=pass_month_quantity, duration=interval) # Logger.v('past_dates', past_dates); state_codes = retrieveOption(collection_name='state', show_keys=['state_code'], hide_keys=['_id']) state_code = extractListByKey(data=state_codes, key='state_code') facility_codes = retrieveOption(collection_name='facility', show_keys=['facility_code'], hide_keys=['_id']) facility_code = extractListByKey(data=facility_codes, key='facility_code') for key in report_keys: # Logger.v('collection', key, past_dates[0]); Debug.trace() if key not in crawl_params: crawl_params[key] = [] mongo_data = list(db[key].find({}, {})) if len(mongo_data) == 0: dates = past_dates[0][:] else: dates = past_dates[0][:1] year = extractYear(data=dates) # Logger.v('year', year); # Logger.v('filter_facility_code', filter_facility_code); if key == 'budget': if not filter_facility_code: iteration = 0 total = len(year) * len(state_code) # fn.printProgressBar(iteration=iteration, total=total); for y in year: for sc in state_code: obj_ = { 'financial_year': y, 'state_code': sc, 'page_type': key, 'upid': '_'.join([sc, y]), 'url': api_links[key].format(sc, y, ''), 'start_date': today, 'end_date': today, } if obj_ not in crawl_params[key]: crawl_params[key].append(obj_) # Logger.v('len(crawl_param])', len(crawl_params[key])); iteration += 1 # fn.printProgressBar(iteration=iteration, total=total); else: iteration = 0 total = len(year) * len(state_code) * len( facility_code[:limit_for_test]) # fn.printProgressBar(iteration=iteration, total=total); for y in year: for sc in state_code: for fc in facility_code[:limit_for_test]: obj_ = { 'financial_year': y, 'state_code': sc, 'page_type': key, 'upid': '_'.join([sc, y, fc]), 'facility_code': fc, 'url': api_links[key].format(sc, y, fc), 'start_date': today, 'end_date': today, } if obj_ not in crawl_params[key]: crawl_params[key].append(obj_) # Logger.v('len(crawl_param])', len(crawl_params[key])); iteration += 1 # fn.printProgressBar(iteration=iteration, total=total); elif key == 'procurement': if not filter_facility_code: for past_duration in dates: start_date = DateTime.toString( DateTime.getDaysAgo(days_to_crawl=-1, datefrom=past_duration[0])) end_date = DateTime.toString( DateTime.getDaysAgo(days_to_crawl=1, datefrom=past_duration[1])) for sc in state_code: obj_ = { 'state_code': sc, 'start_date': start_date, 'end_date': end_date, 'page_type': key, 'upid': '_'.join([sc, start_date, end_date]), 'url': api_links[key].format(sc, start_date.replace('-', ''), end_date.replace('-', ''), ''), } if obj_ not in crawl_params[key]: crawl_params[key].append(obj_) # Logger.v('len(crawl_param])', len(crawl_params[key])); else: for past_duration in dates: start_date = DateTime.toString( DateTime.getDaysAgo(days_to_crawl=-1, datefrom=past_duration[0])) end_date = DateTime.toString( DateTime.getDaysAgo(days_to_crawl=1, datefrom=past_duration[1])) for sc in state_code: for fc in facility_code[:limit_for_test]: obj_ = { 'state_code': sc, 'start_date': start_date, 'end_date': end_date, 'page_type': key, 'facility_code': fc, 'upid': '_'.join([sc, start_date, end_date, fc]), 'url': api_links[key].format( sc, start_date.replace('-', ''), end_date.replace('-', ''), fc) } if obj_ not in crawl_params[key]: crawl_params[key].append(obj_) # Logger.v('len(crawl_param])', len(crawl_params[key])); for c in crawl_params: # Logger.v('crawl_params', c, len(crawl_params[c])); fn.writeExcelFile(filename='{0}/{1}'.format(test_folder, c), data=crawl_params[c]) Logger.v('crawl_params', len(crawl_params)) Debug.show('Generate Crawl Params') return crawl_params
def getDropdownList(params): result = {}; dbManager = SharedMemoryManager.getInstance(); db = dbManager.query(); custom_params = copy.deepcopy(params); for key in ['state', 'ptj', 'facility', 'facility_type']: custom_params['key'] = key; # read from file # filename = '{0}/{1}/2020-02-28.json'.format(main_folder, key); # data = File.readLatestFile(directory='/'.join([main_folder, key]), extension='.json'); # read from mongodb try: data = list(db[key].find({}, {'_id': 0})); except Exception as ex: Logger.v(ex); data = File.readLatestFile(directory='/'.join([main_folder, key]), extension='.json'); # Logger.v('data', data); accessible_data = getAccessibleData(params=custom_params, data=data); result[key] = organiseStructure(data=accessible_data, key=key); result['duration'] = [ { 'id': 'yearly', 'name': 'Yearly', }, { 'id': 'monthly', 'name': 'Monthly', }, ]; result['year'] = [ { 'id': 2020, 'name': '2020', }, { 'id': 2019, 'name': '2019', }, ]; result['procurement_type'] = [ { 'id': 'type1', 'name': 'Type 1', }, { 'id': 'type2', 'name': 'Type 2', }, ]; result['budget_type'] = [ { 'id': 'db', 'name': 'Dasar Baru', }, { 'id': 'oo', 'name': 'One Off', }, ]; return Params.generate(True, result);
def getIntegrity(params, data): dbManager = SharedMemoryManager.getInstance() db = dbManager.query() check_data = fn.getNestedElement(data, 'check_data') facility = ModelFacility.getActiveFacility() filter_key = fn.getNestedElement(params, 'filter_key') durations = fn.getNestedElement(params, 'durations') result = fn.getNestedElement(data, 'to_update') state_data = fn.getNestedElement(data, 'state') facility_data_by_state = fn.getNestedElement(data, 'facility') data_list = getFacilityByState(params=params, data=check_data) for key in data_list: row = fn.getNestedElement(data_list, key) count = getTotalCount(params={ 'filter_key': filter_key, 'key': key }, data={ 'row': row, 'facility': facility }) obj_ = { 'id': fn.convertToSnakecase(fn.getNestedElement(row, 'id')), 'name': fn.getNestedElement(row, 'name'), 'code': fn.getNestedElement(row, 'code'), 'data': [], } for idx in range(len(durations) - 1, -1, -1): date = durations[idx] previous_date = DateTime.toString( DateTime.getDaysAgo(1, datefrom=date)) # Logger.v('date', date, 'previous_date', previous_date); if filter_key: date_count = fn.getNestedElement( facility_data_by_state, '{0}.{1}.{2}'.format(filter_key, key, date), 0) if not date_count: date_count = 0 else: date_count = 0 # do not include those positive, count missing facility quantity only # date_count = fn.getNestedElement(state_data, '{0}.{1}'.format(key, date), 0); if filter_key: val = date_count - count else: val = 0 obj_['data'].append({ previous_date: val, # negative value is missing, 0 mean complete, positive value is not found from user upload facility }) if filter_key: # Logger.v('recursive end') pass else: obj_['facility'] = [] obj_['facility'] = getIntegrity(params={ 'filter_key': key, 'durations': durations, }, data={ 'state': state_data, 'facility': facility_data_by_state, 'to_update': obj_['facility'], 'check_data': check_data, }) result.append(obj_) # Logger.v('result', result) return result
def generateIndex(): global collection_name; dbManager = SharedMemoryManager.getInstance(); db = dbManager.query(); db[collection_name].create_index([('approved_year_month', -1), ('drug_nondrug_code', 1), ('state_name', 1), ('facility_code', 1), ('requester_group_name', 1)]);