class DatabaseServer: def __init__(self, config_file='.env'): self.app = Flask(__name__) try: dotenv_path = os.path.join(os.path.dirname(__file__), config_file) if os.path.exists(dotenv_path): load_dotenv(dotenv_path) self.host = os.environ.get('server_host') self.port = os.environ.get('server_port') self.db = Postgres( db_name=os.environ.get('POSTGRES_DB'), db_username=os.environ.get('POSTGRES_USER'), db_password=os.environ.get('POSTGRES_PASSWORD'), db_host=os.environ.get('db_host'), db_port=os.environ.get('db_port'), log_file=os.environ.get('log_file') ) except Exception as e: print(e) self.__setup_routes() def run(self): self.app.run(host=self.host, port=self.port) def __setup_routes(self): self.__select = self.app.route('/api/select', methods=['GET'])(self.__select) self.__select__with_join = self.app.route('/api/select_with_join', methods=['GET'])(self.__select__with_join) self.__insert = self.app.route('/api/insert', methods=['POST'])(self.__insert) self.__update = self.app.route('/api/update', methods=['POST'])(self.__update) def __select(self): data = request.json ordered_field = None sql_conditions = None try: table = data['table'] fields = data['fields'] except Exception as e: print(colored(e, color='red')) return jsonify({ "error": e, "code": 400 }) if 'ordered_field' in data: ordered_field = data['ordered_field'] if 'conditions' in data: sql_conditions = self.__parse_conditions(data['conditions']) res = self.db.select(table, fields, ordered_field, sql_conditions) return jsonify(res) def __select__with_join(self): data = request.json ordered_field = None sql_conditions = None try: join_tables = data['join_tables'] join_fields = data['join_fields'] fields = data['fields'] except Exception as e: print(colored(e, color='red')) return jsonify({ "error": e, "code": 400 }) if 'ordered_field' in data: ordered_field = data['ordered_field'] if 'conditions' in data: sql_conditions = self.__parse_conditions(data['conditions']) try: res = self.db.select_with_join(join_tables, join_fields, fields, ordered_field, sql_conditions) except Exception as e: print(colored(e, color='red')) return jsonify({ "error": e, "code": 400 }) return jsonify(res) def __insert(self): data = request.json try: table = data['table'] values = data['values'] except Exception as e: print(colored(e, color='red')) return jsonify({ "error": e, "code": 400 }) res = self.db.insert(table, values) return jsonify(res) def __update(self): data = request.json try: table = data['table'] values = data['values'] sql_conditions = self.__parse_conditions(data['conditions']) except Exception as e: print(colored(e, color='red')) return jsonify({ "error": e, "code": 400 }) self.db.update(table, values, sql_conditions) return jsonify({"code": 200}) def __parse_conditions(self, conditions): if conditions is None: return None sql_conditions = [] for entity in conditions: for field in conditions[entity]: if type(conditions[entity][field]) == dict: from_value = str(conditions[entity][field]['from']) to_value = str(conditions[entity][field]['to']) sql_condition = sql.Identifier(field) + sql.SQL(' BETWEEN ') + sql.SQL(from_value) + sql.SQL(' AND ') + sql.SQL(to_value) else: val = str(conditions[entity][field]) table_field = field.split('.') if len(table_field) == 2: field = sql.SQL(table_field[0] + '.') + sql.Identifier(table_field[1]) else: field = sql.Identifier(field) sql_condition = field + sql.SQL(' = ') + sql.SQL(val) sql_conditions.append(sql_condition) if not sql_conditions: sql_conditions = None return sql_conditions
def main(args): # print(args) if args.get_dataset: print('Downloading the dataset..\n') source_dataset = Dataset(CONTRIBUTIONS_PATH) source_dataset.download_file() print('Getting the dataset row count..') ds_reader = DatasetReader(contributions_csv_file) print('\tDataset row count is: {} \n'.format(len(ds_reader))) # start_time = time() # print('Starting (TO NOTE: Part_1 takes ~10.5 minutes to complete.)...\n') ########################################################################### # TODO logging.basicConfig(format=LOGS_FORMAT, # datefmt='%d-%m-%Y %H:%M:%S', # filename="tmp/entity_resolution.log", # filemode='w', # level=logging.DEBUG) # # logging.info('INFO test log message') # logging.debug('debug test log message') ########################################################################### elif args.setup_database: db = Postgres() print('Dropping tables if exist..\n') print(db.drop_table()) print('Creating the tables..\n') # print('creating recipients table...') # print('creating contributions table...') print(db.create_table()) print('Importing raw data from csv...\n') print(db.copy_csv_query()) print('Inserting data into tables...\n') print(db.insert()) print('Creating indexes on tables...\n') # print('creating indexes on donors table...') # print('creating indexes on contributions...') print(db.create_index()) print('Nullifying empty strings in donors...\n') print(db.update_rows()) # print('creating processed_donors...') db.process_donors_table() # print("Database Init Duration: --- %s seconds ---" % (time() - start_time)) elif args.get_partial_duplicate: print('Starting string_grouper...\n') dup_record = DuplicateRecord() sg_start_time = time() unique, duplicate, total, nans = dup_record.get_partial_duplicates() print("Unique: {}, Duplicate: {}, Total: {}, Nan: {}".format( unique, duplicate, total, nans)) print("string_grouper duration:--- %s minutes ---" % ((time() - sg_start_time) / 60.0)) elif args.get_exact_duplicate: print('Starting get_exact_duplicates...\n') sql_start_time = time() dup_record = DuplicateRecord() print(dup_record.get_exact_duplicates()) print("sql Duration:--- %s seconds ---".format( (time() - sql_start_time))) sql_query = args.sql_query if sql_query: dr = DuplicateRecord() print(dr.get_cmdline_query(sql_query)) elif args.get_query_statistics: db = Postgres() db.get_query_statistics()