Exemple #1
0
    def prepare(self):
        df_changed = False
        for file, year in self.fs.data_files:
            if get_file_encoding(file) == "":
                print_flush(f"Determining encoding of file '{file}': ", end="")
                read_file(file)
                print_flush("done!")
                df_changed = True
        if df_changed:
            self.fs.disconnect()
            self.fs.connect()

        @db_session
        def _prepare(session, db):
            aux_collection = self.get_aux_collection(session, db)
            if len(self.fs.data_files) == 0:
                return
            DbOperation(session, db).insert_data(aux_collection, [{
                "file_name": file,
                "year": year,
                "file_seek": 0,
                "header": "",
                "tr_id": 0
            } for file, year in self.fs.data_files],
                                                 "FILL AUX COLLECTION",
                                                 use_session=False)
            return False

        return _prepare(self)
Exemple #2
0
                def _insert_rows(session, db):
                    aux_collection = self.get_aux_collection(session, db)
                    target_collection = self.get_target_collection(session, db)
                    DbOperation(session,
                                db).insert_data(target_collection, rows,
                                                "INSERT ROWS")

                    if end:
                        print_flush(
                            f"\r\x1b[1K\rPopulating from file '{file_name}' ({year}): {' ' * 35}",
                            end="")
                        print_flush(
                            f"\r\x1b[1K\rPopulating from file '{file_name}' ({year}): done!"
                        )
                        DbOperation(session,
                                    db).delete_data(aux_collection, entry,
                                                    "DROP AUX COLLECTION")
                    else:
                        entry["file_seek"] = file.tell()
                        entry["tr_id"] = entry["tr_id"] + 1
                        DbOperation(session, db).insert_data(
                            aux_collection, [{
                                "file_name": file_name,
                                "year": year,
                                "file_seek": file.tell(),
                                "header": header_text,
                                "tr_id": entry["tr_id"]
                            }], "UPDATE FILE SEEK")
Exemple #3
0
def main():
    print_flush("\n\n\n")
    print_flush("Populate script started\n")
    use_env_files()

    populate = Populate()
    with populate:
        while handle_state(populate):
            print_flush()

    print_flush("\nPopulate script stopped")
Exemple #4
0
    def do_query(self):
        print_flush("Executing query...", end="")

        @db_session
        def _do_query(session, db):
            target_collection = self.get_target_collection(session, db)
            result = target_collection.aggregate([{
                "$match": {
                    "$or": [{
                        "year": 2019
                    }, {
                        "year": 2020
                    }],
                    "PHYSTESTSTATUS": "Зараховано"
                }
            }, {
                "$group": {
                    "_id": {
                        "year": "$year",
                        "REGNAME": "$REGNAME",
                    },
                    "max_ball": {
                        "$max": "$PHYSBALL100"
                    }
                }
            }])
            stats = dict()
            for r in result:
                if r["_id"]["REGNAME"] not in stats:
                    stats[r["_id"]["REGNAME"]] = dict()
                stats[r["_id"]["REGNAME"]][r["_id"]["year"]] = float(
                    r["max_ball"].replace(",", "."))
            return stats

        stats = _do_query(self)
        with open(os.path.join(self.fs.query_folder, "query_result.csv"),
                  "w") as f:
            f.write("Region,MaxPhysBall100_2019,MaxPhysBall100_2020\n")
            for s in stats.items():
                f.write(f"{s[0]},{s[1].get(2019)},{s[1].get(2020)}\n")
        print_flush(" done!")
Exemple #5
0
def handle_state(populate):
    state = populate.get_state()
    if state == "clear":
        while True:
            sel = ask_variants("Db is clear.\n", {
                "r": "reload state",
                "s": "start population",
                "e": "exit",
            })
            if sel == "r":
                return reload(populate)
            elif sel == "s":
                reload(populate)
                if populate.get_state() != state:
                    return True
                return start(populate)
            elif sel == "e":
                return False
            print_flush()
    elif state == "finished":
        while True:
            sel = ask_variants(
                "Looks like db is populated.\n", {
                    "r": "reload state",
                    "q": "execute test query",
                    "d": "drop db",
                    "e": "exit",
                })
            if sel == "r":
                return reload(populate)
            elif sel == "q":
                reload(populate)
                if populate.get_state() != state:
                    return True
                populate.do_query()
            elif sel == "d":
                reload(populate)
                if populate.get_state() != state:
                    return True
                if ask_confirm():
                    reload(populate)
                    if populate.get_state() != state:
                        return True
                    return drop_finished(populate)
            elif sel == "e":
                return False
            print_flush()
    elif state == "interrupted":
        while True:
            sel = ask_variants(
                "Looks like the population was interrupted.\n", {
                    "r": "reload state",
                    "c": "continue population",
                    "f": "assume population was finished",
                    "d": "drop db",
                    "e": "exit"
                })
            if sel == "r":
                reload(populate)
                if populate.get_state() != state:
                    return True
                return reload(populate)
            elif sel == "c":
                reload(populate)
                if populate.get_state() != state:
                    return True
                return resume(populate)
            elif sel == "f":
                reload(populate)
                if populate.get_state() != state:
                    return True
                if ask_confirm():
                    reload(populate)
                    if populate.get_state() != state:
                        return True
                    return assume_finished(populate)
            elif sel == "d":
                reload(populate)
                if populate.get_state() != state:
                    return True
                if ask_confirm():
                    reload(populate)
                    if populate.get_state() != state:
                        return True
                    return drop_interrupted(populate)
            elif sel == "e":
                return False
            print_flush()
    return False
Exemple #6
0
def clear_artifacts(populate):
    print_flush("Clearing artifacts...")
    populate.drop_aux()
    return True
Exemple #7
0
def drop_finished(populate):
    print_flush("Dropping...")
    populate.drop_target()
    return True
Exemple #8
0
    def start(self):
        @db_session
        def _get_entries(session, db):
            target_collection = self.get_target_collection(session, db)
            dummy = target_collection.find_one({"dummy": 0})
            if dummy is None:
                DbOperation(session, db).insert_data(target_collection,
                                                     [{
                                                         "dummy": 0
                                                     }],
                                                     "INSERT TARGET DUMMY",
                                                     use_session=False)
            aux_collection = self.get_aux_collection(session, db)
            return aux_collection.find().sort("tr_id", pymongo.DESCENDING)

        entries = _get_entries(self)
        if entries.count() == 0:

            @db_session
            def _delete_dummy(session, db):
                target_collection = self.get_target_collection(session, db)
                dummy = target_collection.find_one({"dummy": 0})
                if dummy is not None:
                    DbOperation(session,
                                db).delete_data(target_collection, dummy,
                                                "INSERT TARGET DUMMY")

            _delete_dummy(self)
            self.drop_aux()
            return True

        entry = entries[0]
        entry_id, file_name, year, file_seek, header_text, tr_id = \
            entry["_id"], entry["file_name"], entry["year"], entry["file_seek"], entry["header"], entry["tr_id"]
        file_size = get_file_size(file_name)
        print_flush(f"Populating from file '{file_name}' ({year}): ", end='')
        with open(file_name, "r",
                  encoding=get_file_encoding(file_name)) as file:
            if file_seek == 0:
                entry["header"] = header_text = file.readline().strip()
                entry["file_seek"] = file.tell()

                @db_session
                def _set_header_text(session, db):
                    aux_collection = self.get_aux_collection(session, db)
                    DbOperation(session,
                                db).update_data(aux_collection, entry,
                                                "SAVE AUX HEADER")

                _set_header_text(self)
            else:
                file.seek(file_seek)
            header = strip_arr(header_text.split(';'))
            header = [h.upper() for h in header]
            batch_size = 1000
            while True:
                print_flush(
                    f"\rPopulating from file '{file_name}' ({year}): "
                    f"{format_file_size(entry['file_seek'])} / {format_file_size(file_size)} "
                    f"({entry['file_seek'] / file_size:.2%})",
                    end="")
                end = False
                rows = []
                for i in range(batch_size):
                    line = []
                    prev_line_text = ""
                    while True:
                        line_text = prev_line_text + file.readline().strip()
                        if not line_text:
                            end = True
                            break
                        line = [
                            strip(l) for l in line_text.rstrip().split(';')
                        ]
                        if len(line) == len(header):
                            break
                        prev_line_text = line_text
                    if end:
                        break
                    row = {}
                    for h, v in zip(header, line):
                        row[h] = v
                    row["year"] = year
                    rows.append(row)

                @db_session
                def _insert_rows(session, db):
                    aux_collection = self.get_aux_collection(session, db)
                    target_collection = self.get_target_collection(session, db)
                    DbOperation(session,
                                db).insert_data(target_collection, rows,
                                                "INSERT ROWS")

                    if end:
                        print_flush(
                            f"\r\x1b[1K\rPopulating from file '{file_name}' ({year}): {' ' * 35}",
                            end="")
                        print_flush(
                            f"\r\x1b[1K\rPopulating from file '{file_name}' ({year}): done!"
                        )
                        DbOperation(session,
                                    db).delete_data(aux_collection, entry,
                                                    "DROP AUX COLLECTION")
                    else:
                        entry["file_seek"] = file.tell()
                        entry["tr_id"] = entry["tr_id"] + 1
                        DbOperation(session, db).insert_data(
                            aux_collection, [{
                                "file_name": file_name,
                                "year": year,
                                "file_seek": file.tell(),
                                "header": header_text,
                                "tr_id": entry["tr_id"]
                            }], "UPDATE FILE SEEK")

                _insert_rows(self)

                @db_session
                def _remove_old_aux(session, db):
                    aux_collection = self.get_aux_collection(session, db)
                    for old_entry in aux_collection.find(
                        ({
                            "file_name": file_name
                        } if end else {
                            "file_name": file_name,
                            "tr_id": {
                                "$ne": entry["tr_id"]
                            }
                        })):
                        DbOperation(session,
                                    db).delete_data(aux_collection, old_entry)

                _remove_old_aux(self)
                if end:
                    return self.start()