コード例 #1
0
 def update_docs(self, df, mongo, collection_name):
     bulk_write_list = list()
     for i in df.index:
         row_record = json.loads(df.loc[i].to_json())
         index_str = dict(row_record).get('CS_INDEX', None)
         if index_str is None:
             # append documents
             bulk_write_list.append(
                 UpdateOne(dict(row_record), {'$set': dict(row_record)}, upsert=True))
         elif str(index_str).isdigit() and int(index_str) == i:
             # update existing documents or append otherwise
             del dict(row_record)['CS_INDEX']
             bulk_write_list.append(
                 UpdateOne({'CS_INDEX': int(index_str)}, {'$set': dict(row_record)}, upsert=True))
             index_str = None
         if len(bulk_write_list) == 1000:
             result = mongo[collection_name].bulk_write(
                 bulk_write_list, ordered=True)
             LOG.info(
                 "big csv update -> {0}".format(result.bulk_api_result))
             bulk_write_list = []
     if len(bulk_write_list) > 0:
         result = mongo[collection_name].bulk_write(
             bulk_write_list, ordered=True)
         LOG.info(
             "end of collection update from csv -> {0}".format(result.bulk_api_result))
コード例 #2
0
def auth_user(flask_bcrypt):
    ''' auth endpoint '''
    data = validate_user(request.get_json())
    if data['ok']:
        data = data['data']
        user = mongo.users.find_one({'email': data['email']}, {"_id": 0})
        LOG.debug(user)
        if user and flask_bcrypt.check_password_hash(user['password'],
                                                     data['password']):
            del user['password']
            access_token = create_access_token(identity=data)
            refresh_token = create_refresh_token(identity=data)
            user['token'] = access_token
            user['refresh'] = refresh_token
            return jsonify({'ok': True, 'data': user}), 200
        else:
            return jsonify({
                'ok': False,
                'message': 'invalid username or password'
            }), 401
    else:
        return jsonify({
            'ok':
            False,
            'message':
            'Bad request parameters: {}'.format(data['message'])
        }), 400
コード例 #3
0
 def save_quandl_euronext(self, ticker, valid_data):
     try:
         error = 0
         df = pd.DataFrame(valid_data.items())
         coll_name = ticker.replace("/", "_")
         self.mongo[coll_name].create_index(
                   [('CS_INDEX', ASCENDING)], name='CS_INDEX', unique=True)
         valid_json = {}
         for i in df.index:
             row_record = json.loads(df.loc[i].to_json())
             valid_json[dict(row_record)['0']] = dict(row_record)['1']
         open_list = [x for x in valid_json['OPEN'].items()]
         open_list.sort(key=lambda x: x[0])
         open_list.reverse()
         bulk_write_list = list()
         for date, value in open_list:
             to_save = self.get_data_to_save(
                 date, value, ticker, valid_json)
             bulk_write_list.append(InsertOne(to_save))
             bulk_write_list = self.write_bucket(len(bulk_write_list) ==
                                                 1000, self.mongo[coll_name], bulk_write_list)
         bulk_write_list = self.write_bucket(
             len(bulk_write_list) > 0, self.mongo[coll_name], bulk_write_list)
     except Exception as e:
         error = e
         LOG.critical('-> Failed to save Quandl data  => {0}'.format(e))
     finally:
         if error == 0:
             LOG.info(
                 colored('-> Quandl data successfully extracted: {0}'.format(ticker), 'green'))
コード例 #4
0
 def get_clean_csv_df(self):
     missing_values = ["n/a", "na", "--", 'Null', None, ""]
     try:
         df_with_unnamed = pd.read_csv(
             self.file_path, encoding='utf-8', na_values=missing_values)
         df_with_unnamed.columns = extractor.keys_replacement(
             df_with_unnamed)
         df_with_nan_values = df_with_unnamed.loc[:, ~df_with_unnamed.columns.str.contains('^Unnamed')]  # noqa
         valid_df = df_with_nan_values.dropna(axis=0, how='any')
         valid_df['CS_INDEX'] = list(range(len(valid_df.index)))
         LOG.info('final csv keys: {0}'.format(valid_df.columns.values))
         return valid_df
     except Exception as e:
         LOG.critical('Failed getting dataFrame from csv: {0}'.format(e))
         return None
コード例 #5
0
 def on_any_event(event):
     print("===================FileHandler===================")
     if event.is_directory:
         print('Folder change detected - {0}'.format(event.src_path))
         print("===================================================")
         return None
     print('File change detected - {0}'.format(event.src_path))
     print('Check path is file ===> {0}'.format(
         Path(event.src_path).is_file()))
     print("===================================================")
     if event.event_type == 'created' or event.event_type == 'modified':
         LOG.info('File created - {0}'.format(event.src_path))
         process_file(event.src_path)
     if event.event_type == 'deleted':
         LOG.critical('File deleted - {0}'.format(event.src_path))
コード例 #6
0
 def get_clean_xl_df(self, sheet):
     try:
         skip_rows = 0
         if sheet in ["ENTITIES", "SCENARIOS"]:
             skip_rows = 1
         try:
             df_with_unnamed = pd.read_excel(self.file_path,
                                             sheet_name=sheet,
                                             skiprows=skip_rows,
                                             encoding='utf-8')
         except Exception as e:
             LOG.critical('Failed getting dataFrame: {0}'.format(e))
             return None
         df_with_unnamed.columns = extractor.keys_replacement(
             df_with_unnamed)
         df_with_nan_values = df_with_unnamed.loc[:, ~df_with_unnamed.
                                                  columns.str.contains(
                                                      '^Unnamed')]  # noqa
         valid_df = df_with_nan_values.dropna(axis=1, how='any').copy()
         valid_df['CS_INDEX'] = list(range(len(valid_df.index)))
         LOG.info('final keys: {0}'.format(valid_df.columns.values))
         return valid_df
     except Exception as e:
         LOG.critical('Failed cleaning dataFrame from excel: {0}'.format(e))
         return None
コード例 #7
0
def process_file(file_path):
    p = Path(file_path)
    if p.is_file():
        ext = p.suffix
        if file_needs_processing(file_path, ext) == False:
            LOG.info('file not compatible... not saving')
            return
        LOG.info(colored('--------------- Processing => ' + file_path, 'blue'))
        try:
            parse_and_save_file(ext, file_path)
        except (OSError, IOError) as e:
            LOG.critical(e)
コード例 #8
0
def parse_and_save_file(ext, file_path):
    try:
        error = 0
        if ext in [".xls", ".xlsx"]:
            xl_saver = XlSaver(file_path)
            xl_saver.handle_excel()
        elif ext == ".csv":
            cs_saver = CsvSaver(file_path)
            cs_saver.handle_csv()
        elif ext == ".txt" and ('data_sources/mt940' in file_path):
            mt940_saver = SwiftSaver(file_path)
            mt940_saver.handle_mt940()
    except Exception as e:
        error = e
        LOG.critical('Failed to save File: {0}'.format(file_path))
        LOG.critical('ETM_SAVE_FILE_ERROR -> {0}'.format(e))
        raise SaveFileError()
    finally:
        if error == 0:
            LOG.info(
                colored(
                    '--------------- File successfully processed without uncatched exceptions :) => '
                    + file_path, 'yellow'))
コード例 #9
0
 def handle_csv(self):
     collection_name = None
     records = None
     error = 0
     try:
         df = self.get_clean_csv_df()
         if df is None:
             error = 1
             raise Exception
         collection_name = extractor.get_valid_string(
             Path(self.file_path).stem)
         records = json.loads(df.T.to_json())
         mongo = db_manager.get_client_instance_with_db_name(self.db_name)
         if db_manager.coll_exists(self.db_name, collection_name) == True:
             LOG.info(
                 'attempting to update collection -> {0}'.format(collection_name))
             try:
                 self.update_docs(df, mongo, collection_name)
             except BulkWriteError as bwe:
                 print(bwe.details)
         else:
             LOG.info(
                 'attempting to create collection -> {0}'.format(collection_name))
             if ('CS_INDEX' in df.columns.values):
                 mongo[collection_name].create_index(
                     [('CS_INDEX', ASCENDING)], name='CS_INDEX', unique=True)
             mongo[collection_name].insert_many(list(records.values()))
     except Exception as e:
         error = e
         if collection_name is not None:
             LOG.critical(
                 'failed to save to collection: {0}'.format(collection_name))
         if records is not None:
             LOG.critical('-> data not saved !!!! => {0}'.format(
                 json.dumps(records, indent=4, sort_keys=True)))
         LOG.critical('ETM_CSV_ERROR -> {0}'.format(e))
     finally:
         if error == 0 and collection_name is not None:
             LOG.info(
                 colored('--------------- CSV successfully saved in ' + self.db_name + ' database to collection: ' + collection_name + ' --- \\o/ supa dupa dope !!! :) ', 'green'))
コード例 #10
0
def not_found(error):
    """ error handler send fake index ;p """
    LOG.error(error)
    return send_from_directory(PUBLIC_PATH, 'resources/404.html')
コード例 #11
0
 def handle_mt940(self):
     try:
         error = 0
         collection_name = extractor.get_valid_string(
             Path(self.file_path).stem)
         transactions = mt940.parse(self.file_path)
         if transactions.data is not None and len(transactions.data) != 0:
             # LOG.info('-> transactions => {0}'.format(
             #     json.dumps(transactions.data, indent=4, sort_keys=True, cls=mt940.JSONEncoder)))
             records = json.loads(
                 json.dumps(transactions.data, cls=mt940.JSONEncoder))
             mongo = db_manager.get_client_instance_with_db_name(
                 self.db_name)
             LOG.info('attempting to create collection -> {0}'.format(
                 collection_name))
             LOG.info('saving mt940 format -> {0}'.format(records))
             mongo[collection_name].insert_one(records)
         else:
             error = 1
             LOG.critical(
                 'mt-940 library failed to parse file need custom parsing RTFM => file not saved !!!!! :=> {0}'
                 .format(self.file_path))
     except Exception as e:
         error = e
         LOG.critical(
             'failed to save to collection: {0}'.format(collection_name))
         if extractor.is_valid_json(transactions.data):
             LOG.critical('-> data not saved !!!! => {0}'.format(
                 json.dumps(transactions.data,
                            indent=4,
                            sort_keys=True,
                            cls=mt940.JSONEncoder)))
         LOG.critical('ETM_MT940_ERROR -> {0}'.format(error))
         raise SwiftSaverFileError(e)
     finally:
         if error == 0:
             LOG.info(
                 colored(
                     '--------------- mt940 successfully saved in ' +
                     self.db_name + ' database to collection: ' +
                     collection_name + ' --- \\o/ supa dupa dope !!! :) ',
                     'green'))
コード例 #12
0
 def handle_excel(self):
     xl_file = pd.ExcelFile(self.file_path)
     sheets_array = xl_file.sheet_names
     LOG.info('sheets_array => {0}'.format(sheets_array))
     for sheet in sheets_array:
         self.save_sheet_to_db(sheet)