Beispiel #1
0
class DatabaseServer:

    def __init__(self, config_file='.env'):
        self.app = Flask(__name__)

        try:
            dotenv_path = os.path.join(os.path.dirname(__file__), config_file)
            if os.path.exists(dotenv_path):
                load_dotenv(dotenv_path)

            self.host = os.environ.get('server_host')
            self.port = os.environ.get('server_port')

            self.db = Postgres(
                db_name=os.environ.get('POSTGRES_DB'),
                db_username=os.environ.get('POSTGRES_USER'),
                db_password=os.environ.get('POSTGRES_PASSWORD'),
                db_host=os.environ.get('db_host'),
                db_port=os.environ.get('db_port'),
                log_file=os.environ.get('log_file')
            )
        except Exception as e:
            print(e)

        self.__setup_routes()

    def run(self):
        self.app.run(host=self.host, port=self.port)

    def __setup_routes(self):
        self.__select = self.app.route('/api/select', methods=['GET'])(self.__select)
        self.__select__with_join = self.app.route('/api/select_with_join', methods=['GET'])(self.__select__with_join)
        self.__insert = self.app.route('/api/insert', methods=['POST'])(self.__insert)
        self.__update = self.app.route('/api/update', methods=['POST'])(self.__update)

    def __select(self):
        data = request.json
        ordered_field = None
        sql_conditions = None

        try:
            table = data['table']
            fields = data['fields']
        except Exception as e:
            print(colored(e, color='red'))
            return jsonify({
                "error": e,
                "code": 400
            })

        if 'ordered_field' in data:
            ordered_field = data['ordered_field']
        if 'conditions' in data:
            sql_conditions = self.__parse_conditions(data['conditions'])

        res = self.db.select(table, fields, ordered_field, sql_conditions)
        return jsonify(res)

    def __select__with_join(self):
        data = request.json
        ordered_field = None
        sql_conditions = None

        try:
            join_tables = data['join_tables']
            join_fields = data['join_fields']
            fields = data['fields']
        except Exception as e:
            print(colored(e, color='red'))
            return jsonify({
                "error": e,
                "code": 400
            })

        if 'ordered_field' in data:
            ordered_field = data['ordered_field']
        if 'conditions' in data:
            sql_conditions = self.__parse_conditions(data['conditions'])

        try:
            res = self.db.select_with_join(join_tables, join_fields, fields, ordered_field, sql_conditions)
        except Exception as e:
            print(colored(e, color='red'))
            return jsonify({
                "error": e,
                "code": 400
            })

        return jsonify(res)

    def __insert(self):
        data = request.json

        try:
            table = data['table']
            values = data['values']
        except Exception as e:
            print(colored(e, color='red'))
            return jsonify({
                "error": e,
                "code": 400
            })

        res = self.db.insert(table, values)
        return jsonify(res)

    def __update(self):
        data = request.json

        try:
            table = data['table']
            values = data['values']
            sql_conditions = self.__parse_conditions(data['conditions'])
        except Exception as e:
            print(colored(e, color='red'))
            return jsonify({
                "error": e,
                "code": 400
            })

        self.db.update(table, values, sql_conditions)
        return jsonify({"code": 200})

    def __parse_conditions(self, conditions):
        if conditions is None:
            return None
        sql_conditions = []

        for entity in conditions:
            for field in conditions[entity]:
                if type(conditions[entity][field]) == dict:
                    from_value = str(conditions[entity][field]['from'])
                    to_value = str(conditions[entity][field]['to'])
                    sql_condition = sql.Identifier(field) + sql.SQL(' BETWEEN ') + sql.SQL(from_value) + sql.SQL(' AND ') + sql.SQL(to_value)
                else:
                    val = str(conditions[entity][field])

                    table_field = field.split('.')
                    if len(table_field) == 2:
                        field = sql.SQL(table_field[0] + '.') + sql.Identifier(table_field[1])
                    else:
                        field = sql.Identifier(field)

                    sql_condition = field + sql.SQL(' = ') + sql.SQL(val)
                sql_conditions.append(sql_condition)

        if not sql_conditions:
            sql_conditions = None

        return sql_conditions
def main(args):
    # print(args)

    if args.get_dataset:
        print('Downloading the dataset..\n')
        source_dataset = Dataset(CONTRIBUTIONS_PATH)
        source_dataset.download_file()

        print('Getting the dataset row count..')
        ds_reader = DatasetReader(contributions_csv_file)
        print('\tDataset row count is: {} \n'.format(len(ds_reader)))

    # start_time = time()
    # print('Starting (TO NOTE: Part_1 takes ~10.5 minutes to complete.)...\n')
    ###########################################################################
    # TODO logging.basicConfig(format=LOGS_FORMAT,
    #                     datefmt='%d-%m-%Y %H:%M:%S',
    #                     filename="tmp/entity_resolution.log",
    #                     filemode='w',
    #                     level=logging.DEBUG)
    #
    # logging.info('INFO test log message')
    # logging.debug('debug test log message')
    ###########################################################################

    elif args.setup_database:
        db = Postgres()
        print('Dropping tables if exist..\n')
        print(db.drop_table())

        print('Creating the tables..\n')
        # print('creating recipients table...')
        # print('creating contributions table...')
        print(db.create_table())

        print('Importing raw data from csv...\n')
        print(db.copy_csv_query())

        print('Inserting data into tables...\n')
        print(db.insert())

        print('Creating indexes on tables...\n')
        # print('creating indexes on donors table...')
        # print('creating indexes on contributions...')
        print(db.create_index())

        print('Nullifying empty strings in donors...\n')
        print(db.update_rows())

        # print('creating processed_donors...')
        db.process_donors_table()
        # print("Database Init Duration: --- %s seconds ---" % (time() - start_time))

    elif args.get_partial_duplicate:
        print('Starting string_grouper...\n')
        dup_record = DuplicateRecord()
        sg_start_time = time()
        unique, duplicate, total, nans = dup_record.get_partial_duplicates()
        print("Unique: {}, Duplicate: {}, Total: {}, Nan: {}".format(
            unique, duplicate, total, nans))
        print("string_grouper duration:--- %s minutes ---" %
              ((time() - sg_start_time) / 60.0))

    elif args.get_exact_duplicate:
        print('Starting get_exact_duplicates...\n')
        sql_start_time = time()
        dup_record = DuplicateRecord()
        print(dup_record.get_exact_duplicates())
        print("sql Duration:--- %s seconds ---".format(
            (time() - sql_start_time)))

    sql_query = args.sql_query
    if sql_query:
        dr = DuplicateRecord()
        print(dr.get_cmdline_query(sql_query))

    elif args.get_query_statistics:
        db = Postgres()
        db.get_query_statistics()