def update_docs(self, df, mongo, collection_name): bulk_write_list = list() for i in df.index: row_record = json.loads(df.loc[i].to_json()) index_str = dict(row_record).get('CS_INDEX', None) if index_str is None: # append documents bulk_write_list.append( UpdateOne(dict(row_record), {'$set': dict(row_record)}, upsert=True)) elif str(index_str).isdigit() and int(index_str) == i: # update existing documents or append otherwise del dict(row_record)['CS_INDEX'] bulk_write_list.append( UpdateOne({'CS_INDEX': int(index_str)}, {'$set': dict(row_record)}, upsert=True)) index_str = None if len(bulk_write_list) == 1000: result = mongo[collection_name].bulk_write( bulk_write_list, ordered=True) LOG.info( "big csv update -> {0}".format(result.bulk_api_result)) bulk_write_list = [] if len(bulk_write_list) > 0: result = mongo[collection_name].bulk_write( bulk_write_list, ordered=True) LOG.info( "end of collection update from csv -> {0}".format(result.bulk_api_result))
def auth_user(flask_bcrypt): ''' auth endpoint ''' data = validate_user(request.get_json()) if data['ok']: data = data['data'] user = mongo.users.find_one({'email': data['email']}, {"_id": 0}) LOG.debug(user) if user and flask_bcrypt.check_password_hash(user['password'], data['password']): del user['password'] access_token = create_access_token(identity=data) refresh_token = create_refresh_token(identity=data) user['token'] = access_token user['refresh'] = refresh_token return jsonify({'ok': True, 'data': user}), 200 else: return jsonify({ 'ok': False, 'message': 'invalid username or password' }), 401 else: return jsonify({ 'ok': False, 'message': 'Bad request parameters: {}'.format(data['message']) }), 400
def save_quandl_euronext(self, ticker, valid_data): try: error = 0 df = pd.DataFrame(valid_data.items()) coll_name = ticker.replace("/", "_") self.mongo[coll_name].create_index( [('CS_INDEX', ASCENDING)], name='CS_INDEX', unique=True) valid_json = {} for i in df.index: row_record = json.loads(df.loc[i].to_json()) valid_json[dict(row_record)['0']] = dict(row_record)['1'] open_list = [x for x in valid_json['OPEN'].items()] open_list.sort(key=lambda x: x[0]) open_list.reverse() bulk_write_list = list() for date, value in open_list: to_save = self.get_data_to_save( date, value, ticker, valid_json) bulk_write_list.append(InsertOne(to_save)) bulk_write_list = self.write_bucket(len(bulk_write_list) == 1000, self.mongo[coll_name], bulk_write_list) bulk_write_list = self.write_bucket( len(bulk_write_list) > 0, self.mongo[coll_name], bulk_write_list) except Exception as e: error = e LOG.critical('-> Failed to save Quandl data => {0}'.format(e)) finally: if error == 0: LOG.info( colored('-> Quandl data successfully extracted: {0}'.format(ticker), 'green'))
def get_clean_csv_df(self): missing_values = ["n/a", "na", "--", 'Null', None, ""] try: df_with_unnamed = pd.read_csv( self.file_path, encoding='utf-8', na_values=missing_values) df_with_unnamed.columns = extractor.keys_replacement( df_with_unnamed) df_with_nan_values = df_with_unnamed.loc[:, ~df_with_unnamed.columns.str.contains('^Unnamed')] # noqa valid_df = df_with_nan_values.dropna(axis=0, how='any') valid_df['CS_INDEX'] = list(range(len(valid_df.index))) LOG.info('final csv keys: {0}'.format(valid_df.columns.values)) return valid_df except Exception as e: LOG.critical('Failed getting dataFrame from csv: {0}'.format(e)) return None
def on_any_event(event): print("===================FileHandler===================") if event.is_directory: print('Folder change detected - {0}'.format(event.src_path)) print("===================================================") return None print('File change detected - {0}'.format(event.src_path)) print('Check path is file ===> {0}'.format( Path(event.src_path).is_file())) print("===================================================") if event.event_type == 'created' or event.event_type == 'modified': LOG.info('File created - {0}'.format(event.src_path)) process_file(event.src_path) if event.event_type == 'deleted': LOG.critical('File deleted - {0}'.format(event.src_path))
def get_clean_xl_df(self, sheet): try: skip_rows = 0 if sheet in ["ENTITIES", "SCENARIOS"]: skip_rows = 1 try: df_with_unnamed = pd.read_excel(self.file_path, sheet_name=sheet, skiprows=skip_rows, encoding='utf-8') except Exception as e: LOG.critical('Failed getting dataFrame: {0}'.format(e)) return None df_with_unnamed.columns = extractor.keys_replacement( df_with_unnamed) df_with_nan_values = df_with_unnamed.loc[:, ~df_with_unnamed. columns.str.contains( '^Unnamed')] # noqa valid_df = df_with_nan_values.dropna(axis=1, how='any').copy() valid_df['CS_INDEX'] = list(range(len(valid_df.index))) LOG.info('final keys: {0}'.format(valid_df.columns.values)) return valid_df except Exception as e: LOG.critical('Failed cleaning dataFrame from excel: {0}'.format(e)) return None
def process_file(file_path): p = Path(file_path) if p.is_file(): ext = p.suffix if file_needs_processing(file_path, ext) == False: LOG.info('file not compatible... not saving') return LOG.info(colored('--------------- Processing => ' + file_path, 'blue')) try: parse_and_save_file(ext, file_path) except (OSError, IOError) as e: LOG.critical(e)
def parse_and_save_file(ext, file_path): try: error = 0 if ext in [".xls", ".xlsx"]: xl_saver = XlSaver(file_path) xl_saver.handle_excel() elif ext == ".csv": cs_saver = CsvSaver(file_path) cs_saver.handle_csv() elif ext == ".txt" and ('data_sources/mt940' in file_path): mt940_saver = SwiftSaver(file_path) mt940_saver.handle_mt940() except Exception as e: error = e LOG.critical('Failed to save File: {0}'.format(file_path)) LOG.critical('ETM_SAVE_FILE_ERROR -> {0}'.format(e)) raise SaveFileError() finally: if error == 0: LOG.info( colored( '--------------- File successfully processed without uncatched exceptions :) => ' + file_path, 'yellow'))
def handle_csv(self): collection_name = None records = None error = 0 try: df = self.get_clean_csv_df() if df is None: error = 1 raise Exception collection_name = extractor.get_valid_string( Path(self.file_path).stem) records = json.loads(df.T.to_json()) mongo = db_manager.get_client_instance_with_db_name(self.db_name) if db_manager.coll_exists(self.db_name, collection_name) == True: LOG.info( 'attempting to update collection -> {0}'.format(collection_name)) try: self.update_docs(df, mongo, collection_name) except BulkWriteError as bwe: print(bwe.details) else: LOG.info( 'attempting to create collection -> {0}'.format(collection_name)) if ('CS_INDEX' in df.columns.values): mongo[collection_name].create_index( [('CS_INDEX', ASCENDING)], name='CS_INDEX', unique=True) mongo[collection_name].insert_many(list(records.values())) except Exception as e: error = e if collection_name is not None: LOG.critical( 'failed to save to collection: {0}'.format(collection_name)) if records is not None: LOG.critical('-> data not saved !!!! => {0}'.format( json.dumps(records, indent=4, sort_keys=True))) LOG.critical('ETM_CSV_ERROR -> {0}'.format(e)) finally: if error == 0 and collection_name is not None: LOG.info( colored('--------------- CSV successfully saved in ' + self.db_name + ' database to collection: ' + collection_name + ' --- \\o/ supa dupa dope !!! :) ', 'green'))
def not_found(error): """ error handler send fake index ;p """ LOG.error(error) return send_from_directory(PUBLIC_PATH, 'resources/404.html')
def handle_mt940(self): try: error = 0 collection_name = extractor.get_valid_string( Path(self.file_path).stem) transactions = mt940.parse(self.file_path) if transactions.data is not None and len(transactions.data) != 0: # LOG.info('-> transactions => {0}'.format( # json.dumps(transactions.data, indent=4, sort_keys=True, cls=mt940.JSONEncoder))) records = json.loads( json.dumps(transactions.data, cls=mt940.JSONEncoder)) mongo = db_manager.get_client_instance_with_db_name( self.db_name) LOG.info('attempting to create collection -> {0}'.format( collection_name)) LOG.info('saving mt940 format -> {0}'.format(records)) mongo[collection_name].insert_one(records) else: error = 1 LOG.critical( 'mt-940 library failed to parse file need custom parsing RTFM => file not saved !!!!! :=> {0}' .format(self.file_path)) except Exception as e: error = e LOG.critical( 'failed to save to collection: {0}'.format(collection_name)) if extractor.is_valid_json(transactions.data): LOG.critical('-> data not saved !!!! => {0}'.format( json.dumps(transactions.data, indent=4, sort_keys=True, cls=mt940.JSONEncoder))) LOG.critical('ETM_MT940_ERROR -> {0}'.format(error)) raise SwiftSaverFileError(e) finally: if error == 0: LOG.info( colored( '--------------- mt940 successfully saved in ' + self.db_name + ' database to collection: ' + collection_name + ' --- \\o/ supa dupa dope !!! :) ', 'green'))
def handle_excel(self): xl_file = pd.ExcelFile(self.file_path) sheets_array = xl_file.sheet_names LOG.info('sheets_array => {0}'.format(sheets_array)) for sheet in sheets_array: self.save_sheet_to_db(sheet)