예제 #1
0
def _import_and_commit(dolt: Dolt, table: str, data: pd.DataFrame,
                       primary_keys: Optional[List[str]], import_mode: str):
    dolt_write.import_df(dolt, table, pd.DataFrame(data), primary_keys,
                         import_mode)
    dolt.add(table)
    dolt.commit('Executed import on table {} in import mode "{}"'.format(
        table, import_mode))
    commit = dolt.log()[0]

    return {
        'commit_hash': commit.hash,
        'timestamp': commit.ts,
        'author': commit.author,
        'message': commit.message
    }
예제 #2
0
def main():
    #cleanup()

    repo_path = "."
    repo = Dolt(repo_path)

    act_branch, branches = repo.branch()
    brn_cnt=len(branches)

    print()

    answer = ''
    printFlag = False

    while answer != 'q':
        print("0 - List Branches")
        print("1 - List Active Branch")
        print("2 - Get Branch Commits/Revisions")
        print("3 - Change Branch")
        print("4 - Create Branch")
        print("5 - Merge Branch")
        print("6 - Delete Branch")
        print("7 - List Revision Range")
        print("8 - Checkout a Revision")
        print("9 - Enable List Commits Printing")
        print("10- Add Revisions")
        print("11- Batch Revisions")
        print("q - Quit")
        answer = input("Select item : ")

        #try:
        # List Branches
        if answer == '0':
            act_branch, branches = repo.branch()
            brn_cnt=len(branches)
            for brn_id in range(0,brn_cnt):
                branchData = str(branches[brn_id]).split(":", 2)
                branchName = str(branchData[1]).split(",", 1)
                print (branchName[0])

        # List Active Branch
        elif answer == '1':
            print()
            print("Active Branch:")
            print(act_branch)
            print()

        # Get Branch Commits/Revisions
        elif answer == '2':
            start_time = datetime.datetime.now()
            commits = list(repo.log().values())
            end_time = datetime.datetime.now()
            com_cnt=len(commits)
            print("Total Revisions = ", com_cnt)
            print("Total Commit Revision Pull Time = ", end_time-start_time)
            for com_id in range(0,com_cnt):
                com_data = str(commits[com_id]).split(":", 1)
                msg = com_data[1].split(" @ ", 1)
                if printFlag == True:
                    print("commit_id: ", com_id, msg[1])

        # Change Branch
        elif answer == '3':
            myBranch = input("Branch Name : ")
            switch_branch(repo, myBranch)
            act_branch, branches = repo.branch()
            brn_cnt=len(branches)
            start_time = datetime.datetime.now()
            commits = list(repo.log().values())
            end_time = datetime.datetime.now()
            com_cnt=len(commits)
            print("Total Commit Revision Pull Time = ", end_time-start_time)

        # Create Branch
        elif answer == '4':
            print()
            print("Active Branch:")
            print(act_branch)
            print()
            newBranch = input("Enter New Branch Name : ")
            start_time = datetime.datetime.now()
            create_branch(repo, newBranch)
            end_time = datetime.datetime.now()
            print("Total Create Branch Time = ", end_time-start_time)

        # Merge Branch
        elif answer == '5':
            print()
            print("Active Branch:")
            print(act_branch)
            print()
            mergeBranch = input("Enter Merge Branch Name : ")
            start_time = datetime.datetime.now()
            merge_branch(repo, mergeBranch, "Merging Branch")
            end_time = datetime.datetime.now()
            print("Total Merge Branch Time = ", end_time-start_time)

        # Delete Branch
        elif answer == '6':
            print()
            print("Active Branch:")
            print(act_branch)
            print()
            deleteBranch = input("Enter Branch Name To Delete : ")
            start_time = datetime.datetime.now()
            delete_branch(repo, deleteBranch)
            end_time = datetime.datetime.now()
            print("Total Create Branch Time = ", end_time-start_time)

        # List Revision Range
        elif answer == '7':
            print()
            print("Active Branch:")
            print(act_branch)
            print()
            revision_id = int(input("Enter Start Revision ID : "))
            revision_max = int(input("Enter Revision Range : "))
            for com_id in range(revision_id,revision_id+revision_max):
                com_data = str(commits[com_id]).split(":", 1)
                msg = com_data[1].split(" @ ", 1)
                print("commit_id: ", com_id, msg[1])

        # Checkout a Revision
        elif answer == '8':
            print()
            print("Active Branch:")
            print(act_branch)
            print()
            revision_id = int(input("Enter Revision ID : "))
            com_data = str(commits[revision_id]).split(":", 1)
            msg = com_data[1].split(" @ ", 1)
            startBranch = "revision"+str(revision_id)
            start_time = datetime.datetime.now()
            starting_point_branch(repo, startBranch, com_data[0])
            end_time = datetime.datetime.now()
            print("Total Merge Branch Time = ", end_time-start_time)

        # Enable List Commits Printing
        elif answer == '9':
            printFlag = True

        # Add Revisions
        elif answer == '10':
            print()
            print("Active Branch:")
            print(act_branch)
            print()
            person = str(input("Enter First Name : "))
            revision_id = int(input("Enter Start Revision ID : "))
            revision_max = int(input("Enter End Revision ID : "))
            start_time = datetime.datetime.now()
            for revId in range(revision_id,revision_max+1):
                change_person_revid(repo, person, revId)
                comment=f"{person}      RevID="+str(revId).zfill(2)
                commit(repo, comment)

            end_time = datetime.datetime.now()
            print("Total Revision Addition Time = ", end_time-start_time)
            print(f"Revisions {revision_id} -> {revision_max}")

        # Batch Revisions
        elif answer == '11':
            print()
            print("Active Branch:")
            print(act_branch)
            print()
            person = str(input("Enter First Name : "))
            revision_id = int(input("Enter Start Revision ID : "))
            revision_max = int(input("Enter Revision Range : "))
            step = 50
            start_time = datetime.datetime.now()
            batch_revisions(repo, revision_id, revision_max, step, person)

            end_time = datetime.datetime.now()
            print("Total Revision Addition Time = ", end_time-start_time)
예제 #3
0
class DoltDT(object):
    def __init__(self, run=None, database: str = ".", branch: str = 'master'):
        """
        Initialize a new context for Dolt operations with Metaflow.

        run: this is either
            - a FlowSpec when initialized with a running Flow
            - a Flow when looking across for data read/written across runs of a Flow
            - a Run when looking for data read/written by a specific run
        doltdb_path: this is a path to a location on the filesystem with a Dolt database
        """
        self.run = run
        self.database = database
        self.branch = branch
        self.meta_database = "."

        self.doltdb = Dolt(self.database)
        try:
            self.meta_doltdb = Dolt(os.getcwd())
        except:
            self.meta_doltdb = Dolt.init(os.getcwd())

        current_branch, _ = self.doltdb.branch()
        self.entry_branch = None
        if current_branch.name != branch:
            entry_branch = current_branch.name
            self.doltdb.checkout(branch, checkout_branch=False)

        self.table_reads = []
        self.table_writes = []

    def __enter__(self):
        assert isinstance(
            self.run, FlowSpec
        ) and current.is_running_flow, 'Context manager use requires running flow'
        assert self.doltdb.status(
        ).is_clean, 'DoltDT as context manager requires clean working set for transaction semantics'
        return self

    def __exit__(self, *args, allow_empty: bool = True):
        if not self.doltdb.status().is_clean:
            self.commit_writes()
        if self.table_reads or self.table_writes:
            self.commit_metadata()

    def _get_table_read(self, table: str) -> DoltRead:
        return self._get_dolt_action('read', DoltRead, table)

    def _get_table_write(self, table: str) -> DoltWrite:
        return self._get_dolt_action('write', DoltWrite, table)

    def _get_dolt_action(self, action_str: str, action: type, table: str):
        return action(
            flow_name=current.flow_name,
            run_id=current.run_id,
            step_name=current.step_name,
            task_id=current.task_id,
            commit=self._get_latest_commit_hash(),
            table_name=table,
            database=self.database,
            kind=action_str,
        )

    def _get_latest_commit_hash(self) -> str:
        lg = self.doltdb.log()
        return lg.popitem(last=False)[0]

    def write_metadata(self, data: List[DoltMeta]):
        """Important that write metadata commit is recorded immediately after the data commit"""
        meta_df = pd.DataFrame.from_records(
            [x.dict() for x in self.table_reads + self.table_writes])
        import_df(repo=self.meta_doltdb,
                  table_name="metadata",
                  data=meta_df,
                  primary_keys=meta_df.columns.tolist())

    def write_table(self, table_name: str, df: pd.DataFrame, pks: List[str]):
        """
        Writes the contents of the given DataFrame to the specified table. If the table exists it is updated, if it
        does not it is created.
        """
        assert current.is_running_flow, 'Writes and commits are only supported in a running Flow'
        import_df(repo=self.doltdb,
                  table_name=table_name,
                  data=df,
                  primary_keys=pks)
        self.table_writes.append(self._get_table_write(table_name))

    def read_table(self,
                   table_name: str,
                   commit: str = None,
                   flow_name: str = None,
                   run_id: str = None) -> pd.DataFrame:
        """
        Returns the specified tables as a DataFrame.
        """
        if not current.is_running_flow:
            raise ValueError("read_table is only supported in a running Flow")

        read_meta = self._get_table_read(table_name)

        if commit:
            table = self._get_dolt_table_asof(self.doltdb, table_name, commit)
            read_meta.commit = commit
        elif flow_name and run_id:
            df = read_table_sql(self.meta_doltdb,
                                _get_actions_query(flow_name, run_id, 'read'))
            database = df.database.values[0]
            commit = df.commit.values[0]
            # checkout database and get table ASOF commit
            db = Dolt(database)
            table = self._get_dolt_table_asof(db, table_name, commit)
            read_meta.commit = commit
        else:
            table = read_table(self.doltdb, table_name)
            read_meta.commit = self._get_latest_commit_hash()
        self.table_reads.append(read_meta)
        return table

    def commit_writes(self, allow_empty=True):
        """
        Creates a new commit containing all the changes recorded in self.dolt_data.['table_writes'], meaning that the
        precise data can be reproduced exactly later on by querying self.flow_spec.
        """
        if not current.is_running_flow:
            raise ValueError(
                'Writes and commits are only supported in a running Flow')

        to_commit = [
            table_write.table_name
            for table_write in self.table_writes + self.table_reads
        ]
        self.doltdb.add(to_commit)
        self.doltdb.commit(message=self._get_commit_message(),
                           allow_empty=allow_empty)

    def commit_metadata(self, allow_empty=True):
        commit_hash = self._get_latest_commit_hash()  # might be different db
        for w in self.table_writes:
            w.set_commit(commit_hash)

        self.write_metadata(self.table_reads + self.table_writes)
        self.meta_doltdb.add("metadata")
        return self.meta_doltdb.commit(message=self._get_commit_message(),
                                       allow_empty=allow_empty)

    @classmethod
    def _get_commit_message(cls):
        return f'{current.flow_name}/{current.run_id}/{current.step_name}/{current.task_id}'

    @classmethod
    def _get_dolt_table_asof(cls,
                             dolt: Dolt,
                             table_name: str,
                             commit: str = None) -> pd.DataFrame:
        base_query = f'SELECT * FROM `{table_name}`'
        if commit:
            return read_table_sql(dolt, f'{base_query} AS OF "{commit}"')
        else:
            return read_table_sql(dolt, base_query)