Esempio n. 1
0
def run_test(test_repo: Dolt, test: str) -> str:
    # ensure table is removed
    if TEST_TABLE in [t.name for t in test_repo.ls()]:
        test_repo.table_rm(TEST_TABLE)

    sysbench_args = [
        'sysbench',
        test,
        '--table-size=1000000',
        '--db-driver=mysql',
        '--mysql-db={}'.format(test_repo.repo_name),
        '--mysql-user=root',
        '--mysql-host=127.0.0.1',
    ]

    # Prepare the test
    prepare_exitcode, prepare_output = _execute(sysbench_args + ['prepare'],
                                                os.getcwd())
    if prepare_exitcode != 0:
        logger.error(prepare_output)
        raise SysbenchFailureException(test, 'prepare', prepare_output)

    # Run the test
    run_exitcode, run_output = _execute(sysbench_args + ['run'], os.getcwd())
    if run_exitcode != 0:
        logger.error(run_output)
        raise SysbenchFailureException(test, 'run', prepare_output)

    return run_output
Esempio n. 2
0
def commitData(repo: Dolt, table: str, message: str) -> bool:
    # Check to ensure changes need to be added first
    if not repo.status().is_clean:
        repo.add(table)
        repo.commit(message)
        return True
    return False
Esempio n. 3
0
def addTweetToDatabase(repo: Dolt, table: str, data: dict):
    # TODO: Figure Out If Tweet Still Accessible Despite Some Error Messages
    if 'errors' in data and data['errors'][0]['parameter'] == 'id':
        errorMessage = archiveErrorMessage(data)

        create_table = '''
            UPDATE {table}
            set
                isDeleted="{isDeleted}",
                json="{json}"
            where
                id={id}
        '''.format(table=table,
                   id=errorMessage['id'],
                   isDeleted=errorMessage['isDeleted'],
                   json=errorMessage['json'])

        repo.sql(create_table, result_format='csv')
        return

    # Tweet Data
    tweet = extractTweet(data)
    df = getDataFrame(tweet)

    # Use `python3 this-script.py --log=VERBOSE` in order to see this output
    logger.log(VERBOSE, json.dumps(tweet, indent=4))

    writeData(repo=repo, table=table, dataFrame=df, requiredKeys=['id'])
Esempio n. 4
0
    def __init__(self, run=None, database: str = ".", branch: str = 'master'):
        """
        Initialize a new context for Dolt operations with Metaflow.

        run: this is either
            - a FlowSpec when initialized with a running Flow
            - a Flow when looking across for data read/written across runs of a Flow
            - a Run when looking for data read/written by a specific run
        doltdb_path: this is a path to a location on the filesystem with a Dolt database
        """
        self.run = run
        self.database = database
        self.branch = branch
        self.meta_database = "."

        self.doltdb = Dolt(self.database)
        try:
            self.meta_doltdb = Dolt(os.getcwd())
        except:
            self.meta_doltdb = Dolt.init(os.getcwd())

        current_branch, _ = self.doltdb.branch()
        self.entry_branch = None
        if current_branch.name != branch:
            entry_branch = current_branch.name
            self.doltdb.checkout(branch, checkout_branch=False)

        self.table_reads = []
        self.table_writes = []
Esempio n. 5
0
def setDeletedStatus(repo: Dolt, table: str, tweet_id: str, deleted: bool):
    tweets: Table = Table(table)
    query: QueryBuilder = Query.update(tweets) \
        .set(tweets.isDeleted, int(deleted)) \
        .where(tweets.id == tweet_id)

    repo.sql(query=query.get_sql(quote_char=None), result_format='csv')
Esempio n. 6
0
def initRepo(path: str, create: bool, url: str = None) -> Dolt:
    # Prepare Repo For Data
    if create:
        repo = Dolt.init(path)
        repo.remote(add=True, name='origin', url=url)
        return repo

    return Dolt(path)
Esempio n. 7
0
    def initRepo(self, path: str, create: bool, url: str = None):
        # Prepare Repo For Data
        if create:
            repo = Dolt.init(path)
            repo.remote(add=True, name='origin', url=url)
            self.repo: Dolt = repo

        self.repo: Dolt = Dolt(path)
Esempio n. 8
0
def setStreamJSON(repo: Dolt, table: str, tweet_id: str, data: dict):
    sql_converter: conversion.MySQLConverter = conversion.MySQLConverter()
    escaped_json: str = sql_converter.escape(value=json.dumps(data))

    tweets: Table = Table(table)
    query: QueryBuilder = Query.update(tweets) \
        .set(tweets.stream_json, escaped_json) \
        .where(tweets.id == tweet_id)

    repo.sql(query=query.get_sql(quote_char=None), result_format="csv")
Esempio n. 9
0
def load_to_dolthub(loader_or_loaders: Union[DoltLoader, List[DoltLoader]],
                    clone: bool,
                    push: bool,
                    remote_name: str,
                    remote_url: str,
                    dolt_dir: str = None,
                    dry_run: bool = False):
    """
    This function takes a `DoltLoaderBuilder`, repo and remote settings, and attempts to execute the loaders returned
    by the builder.
    :param loader_or_loaders:
    :param dolt_dir:
    :param clone:
    :param push:
    :param remote_name:
    :param dry_run:
    :param remote_url:
    :return:
    """
    if type(loader_or_loaders) == list:
        loaders = loader_or_loaders
    else:
        loaders = [loader_or_loaders]

    if clone:
        assert remote_url, 'If clone is True then remote must be passed'
        temp_dir = tempfile.mkdtemp()
        logger.info('Clone is set to true, so ignoring dolt_dir')
        if clone:
            logger.info(
                'Clone set to True, cloning remote {}'.format(remote_url))
        repo = Dolt.clone(remote_url, temp_dir)
    else:
        assert os.path.exists(os.path.join(
            dolt_dir, '.dolt')), 'Repo must exist locally if not cloned'
        repo = Dolt(dolt_dir)

    logger.info('''Commencing to load to DoltHub with the following options:
                        - dolt_dir  {dolt_dir}
                        - clone     {clone}
                        - remote    {remote}
                        - push      {push}
        '''.format(dolt_dir=repo.repo_dir,
                   push=push,
                   clone=clone,
                   remote=remote_name))

    if not dry_run:
        for dolt_loader in loaders:
            branch = dolt_loader(repo)
            if push:
                logger.info('Pushing changes to remote {} on branch {}'.format(
                    remote_name, branch))
                repo.push(remote_name, branch)
Esempio n. 10
0
def addMediaFiles(repo: Dolt, table: str, tweet_id: str, data: List[str]):
    sql_converter: conversion.MySQLConverter = conversion.MySQLConverter()
    escaped_json: str = sql_converter.escape(value=json.dumps(data))

    media: Table = Table(table)
    query: QueryBuilder = Query.into(media) \
        .insert(tweet_id, escaped_json)

    # query: QueryBuilder = Query.update(media) \
    #     .set(media.file, escaped_json) \
    #     .where(media.id == tweet_id)

    repo.sql(query=query.get_sql(quote_char=None), result_format="csv")
Esempio n. 11
0
def clone_or_pull_latest(remote_name, checkout_dir):
    """
    Clones the remote to the specified location if checkout_dir/.dolt does not exist, pulls the latest otherwise
    :param remote_name:
    :param checkout_dir:
    :return:
    """
    if os.path.exists(checkout_dir):
        repo = Dolt(checkout_dir)
        repo.pull()
        return repo
    else:
        return clone_repo(remote_name, checkout_dir)
Esempio n. 12
0
def loader(loader_builder: DoltLoaderBuilder, dolt_dir: str, clone: bool,
           push: bool, remote_name: str, dry_run: bool, remote_url: str):
    if clone:
        assert remote_url, 'If clone is True then remote must be passed'
        temp_dir = tempfile.mkdtemp()
        logger.info('Clone is set to true, so ignoring dolt_dir')
        repo = Dolt(temp_dir)
        if clone:
            logger.info(
                'Clone set to True, cloning remote {}'.format(remote_url))
        repo.clone(remote_url)
    else:
        assert os.path.exists(os.path.join(
            dolt_dir, '.dolt')), 'Repo must exist locally if not cloned'
        repo = Dolt(dolt_dir)

    logger.info('''Commencing to load to DoltHub with the following options:
                        - dolt_dir  {dolt_dir}
                        - clone     {clone}
                        - remote    {remote}
                        - push      {push}
        '''.format(dolt_dir=repo.repo_dir,
                   push=push,
                   clone=clone,
                   remote=remote_name))

    if not dry_run:
        loaders = loader_builder()
        for dolt_loader in loaders:
            branch = dolt_loader(repo)
            if push:
                logger.info('Pushing changes to remote {} on branch {}'.format(
                    remote_name, branch))
                repo.push(remote_name, branch)
Esempio n. 13
0
def _import_and_commit(dolt: Dolt, table: str, data: pd.DataFrame,
                       primary_keys: Optional[List[str]], import_mode: str):
    dolt_write.import_df(dolt, table, pd.DataFrame(data), primary_keys,
                         import_mode)
    dolt.add(table)
    dolt.commit('Executed import on table {} in import mode "{}"'.format(
        table, import_mode))
    commit = dolt.log()[0]

    return {
        'commit_hash': commit.hash,
        'timestamp': commit.ts,
        'author': commit.author,
        'message': commit.message
    }
Esempio n. 14
0
def lookupCurrentPresident(repo: Dolt) -> dict:
    current_time_query = '''
        SELECT CURRENT_TIMESTAMP;
    '''

    # I probably shouldn't be hardcoding the value of the query
    current_time = repo.sql(current_time_query,
                            result_format='csv')[0]['CURRENT_TIMESTAMP()']

    logger.debug("Current SQL Time: {}".format(current_time))

    current_president_query = '''
        select `Twitter User ID`, `Database Name` from presidents where `Start Term`<'{current_date}' and (`End Term`>'{current_date}' or `End Term` is null) limit 1;
    '''.format(current_date=current_time)

    return repo.sql(current_president_query, result_format='csv')
Esempio n. 15
0
def createTableIfNotExists(repo: Dolt, table: str) -> str:
    columns = '''
        `id` bigint unsigned NOT NULL,
        `date` datetime NOT NULL,
        `text` longtext NOT NULL,
        `device` longtext NOT NULL,
        `favorites` bigint unsigned NOT NULL,
        `retweets` bigint unsigned NOT NULL,
        `quoteTweets` bigint unsigned,
        `replies` bigint unsigned,
        `isRetweet` tinyint NOT NULL,
        `isDeleted` tinyint NOT NULL,
        `repliedToTweetId` bigint unsigned,
        `repliedToUserId` bigint unsigned,
        `repliedToTweetDate` datetime,
        `retweetedTweetId` bigint unsigned,
        `retweetedUserId` bigint unsigned,
        `retweetedTweetDate` datetime,
        `expandedUrls` longtext,
        `json` longtext,
        PRIMARY KEY (`id`)
    '''

    settings = '''
        ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
    '''

    create_table = '''
        CREATE TABLE IF NOT EXISTS {table} ({columns}) {settings}
    '''.format(table=table, columns=columns, settings=settings)

    return repo.sql(create_table, result_format='csv')
Esempio n. 16
0
def handle_api(self):
    # Repo
    repo: Dolt = Dolt(config.ARCHIVE_TWEETS_REPO_PATH)
    table: str = config.ARCHIVE_TWEETS_TABLE

    # Determine Reply and Send It
    send_reply(self=self, repo=repo, table=table)
Esempio n. 17
0
def analyze_tweet(api: twitter.Api, status: twitter.models.Status,
                  INFO_QUIET: int = logging.INFO + 1,
                  VERBOSE: int = logging.DEBUG - 1):

    status_text = "12:00 A.M. on the Great Election Fraud of 2020!"  # status.full_text

    # This Variable Is Useful For Debugging Search Queries And Exploits
    original_phrase = get_search_keywords(text=status_text, search_word_query='analyze')

    repo: Dolt = Dolt(config.ARCHIVE_TWEETS_REPO_PATH)
    phrase = convert_search_to_query(phrase=original_phrase)

    search_results = database.search_tweets(search_phrase=phrase, repo=repo, table=config.ARCHIVE_TWEETS_TABLE)

    # Instantiate Text Processor
    analyzer: HostilityAnalysis = HostilityAnalysis(logger_param=logger, verbose_level=VERBOSE)

    # Load Tweets To Analyze
    for result in search_results:
        logger.log(VERBOSE, "Adding Tweet For Processing: {tweet_id} - {tweet_text}".format(tweet_id=result["id"],
                                                                                            tweet_text=result["text"]))
        analyzer.add_tweet_to_process(result)

    analyzer.preprocess_tweets()
    analyzer.process_tweets()
Esempio n. 18
0
def load_to_dolt(repo: Dolt, dolt_table: str, dolt_pks: List[str],
                 state_metadata: StateMetadata,
                 vote_file_builder: VoteFileBuilder,
                 table_data_builder: TableDataBuilder):
    """
    Load to the dolt dir/table specified using given columns for primary keys.
    :param repo:
    :param dolt_table:
    :param dolt_pks:
    :param state_metadata:
    :param vote_file_builder:
    :param table_data_builder:
    :return:
    """
    logger.info('''Loading data for state {}:
                - dolt_dir    : {}
                - dolt_table  : {}
                - dolt_pks    : {}   
            '''.format(state_metadata.state, repo.repo_dir(), dolt_table,
                       dolt_pks))
    table_data = files_to_table_data(state_metadata, vote_file_builder,
                                     table_data_builder)
    import_list(repo,
                dolt_table,
                table_data,
                dolt_pks,
                import_mode='update',
                batch_size=100000)
Esempio n. 19
0
def count_tweets(search_phrase: str,
                 repo: Dolt,
                 table: str,
                 account_id: Optional[int] = None,
                 hide_deleted_tweets: bool = False,
                 only_deleted_tweets: bool = False) -> int:
    tweets: Table = Table(table)
    query: QueryBuilder = Query.from_(tweets) \
        .select(Count(tweets.id)) \
        .orderby(tweets.id, order=Order.desc) \
        .where(Lower(tweets.text).like(
            search_phrase.lower()
        )  # TODO: lower(text) COLLATE utf8mb4_unicode_ci like lower('{search_phrase}')
    )

    if account_id is not None:
        # Show Results For Specific Account
        query: QueryBuilder = query.where(tweets.twitter_user_id == account_id)

    if hide_deleted_tweets:
        # Filter Out Deleted Tweets
        query: QueryBuilder = query.where(tweets.isDeleted == 0)
    elif only_deleted_tweets:
        # Only Show Deleted Tweets
        query: QueryBuilder = query.where(tweets.isDeleted == 1)

    # Perform Count Query
    count_result = repo.sql(query=query.get_sql(quote_char=None),
                            result_format="json")["rows"]

    # Retrieve Count of Tweets From Search
    for header in count_result[0]:
        return count_result[0][header]

    return -1
def write_vote(election_year: int, stage: str, precinct: str, county: str,
               state: str, jurisdiction: str, candidate: str, party: str,
               writein: int, office: str, vote_mode: str, votes: int):
    logging.debug("-------------------------")
    logging.debug(f"Election Year: {election_year}")
    logging.debug(f"Stage: {stage.lower()}")
    logging.debug(f"Precinct: {precinct.upper()}")
    logging.debug(f"County: {county.upper()}")
    logging.debug(f"State: {state.upper()}")
    logging.debug(f"Jurisdiction: {jurisdiction.upper()}")
    logging.debug(f"Candidate: {candidate.upper()}")
    logging.debug(f"Party: {party.upper()}")
    logging.debug(f"Write-In: {writein}")
    logging.debug(f"Office: {office.upper()}")
    logging.debug(f"Candidate: {candidate.upper()}")
    logging.debug(f"Vote Mode: {vote_mode.upper()}")
    logging.debug(f"Votes: {votes}")
    logging.debug("-------------------------")

    # election_year,stage,precinct,county,state,jurisdiction,candidate,party,writein,office,vote_mode,votes
    insert_statement = f'''
        replace into vote_tallies (election_year,stage,precinct,county,state,jurisdiction,candidate,party,writein,office,vote_mode,votes)
        values ("{election_year}", "{stage.lower()}", "{precinct.upper()}", "{county.upper()}",
        "{state.upper()}", "{jurisdiction.upper()}", "{candidate.upper()}", "{party.upper()}", "{writein}",
        "{office.upper()}", "{vote_mode.upper()}", "{votes}");
    '''

    repo: Dolt = Dolt('working/us-president-precinct-results')

    try:
        # repo.sql(insert_statement)
        sql_statements_file.write(f"{insert_statement}\n")
    except DoltException as e:
        logging.error(f"Insertion Exception: {e}")
Esempio n. 21
0
    def read_table(self,
                   table_name: str,
                   commit: str = None,
                   flow_name: str = None,
                   run_id: str = None) -> pd.DataFrame:
        """
        Returns the specified tables as a DataFrame.
        """
        if not current.is_running_flow:
            raise ValueError("read_table is only supported in a running Flow")

        read_meta = self._get_table_read(table_name)

        if commit:
            table = self._get_dolt_table_asof(self.doltdb, table_name, commit)
            read_meta.commit = commit
        elif flow_name and run_id:
            df = read_table_sql(self.meta_doltdb,
                                _get_actions_query(flow_name, run_id, 'read'))
            database = df.database.values[0]
            commit = df.commit.values[0]
            # checkout database and get table ASOF commit
            db = Dolt(database)
            table = self._get_dolt_table_asof(db, table_name, commit)
            read_meta.commit = commit
        else:
            table = read_table(self.doltdb, table_name)
            read_meta.commit = self._get_latest_commit_hash()
        self.table_reads.append(read_meta)
        return table
Esempio n. 22
0
def retrieveAccountInfo(repo: Dolt, account_id: int) -> dict:
    government: Table = Table("government")
    query: QueryBuilder = Query.from_(government) \
        .select(Star()) \
        .where(government.twitter_user_id == account_id)

    return repo.sql(query=query.get_sql(quote_char=None),
                    result_format='json')["rows"]
Esempio n. 23
0
def lookupActiveAccounts(repo: Dolt) -> dict:
    government: Table = Table("government")
    query: QueryBuilder = Query.from_(government) \
        .select(government.twitter_user_id, government.first_name, government.last_name) \
        .where(government.archived == 0)

    return repo.sql(query=query.get_sql(quote_char=None),
                    result_format='json')["rows"]
Esempio n. 24
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--dolt-path',
        help='The location of the Dolt database on the server machine')
    args = parser.parse_args()
    global DOLT
    DOLT = Dolt(args.dolt_path)
    app.run()
Esempio n. 25
0
def retrieveMissingBroadcastInfo(repo: Dolt, table: str) -> dict:
    tweets: Table = Table(table)
    query: QueryBuilder = Query.from_(tweets) \
        .select(tweets.id, tweets.expandedUrls) \
        .where(tweets.expandedUrls.like("https://twitter.com/i/broadcasts/%")) \
        .where(tweets.broadcast_json.isnull())

    return repo.sql(query=query.get_sql(quote_char=None),
                    result_format='json')["rows"]
Esempio n. 26
0
File: dolt.py Progetto: lucab/doltpy
def _server_helper(repo: Dolt, request):
    repo.sql_server()

    @retry(delay=2, tries=10, exceptions=(
            sqlalchemy.exc.OperationalError,
            sqlalchemy.exc.DatabaseError,
            sqlalchemy.exc.InterfaceError,
    ))
    def verify_connection():
        conn = repo.get_engine().connect()
        conn.close()
        return repo.get_engine()

    def finalize():
        if repo.server:
            repo.sql_server_stop()

    verify_connection()
    request.addfinalizer(finalize)
Esempio n. 27
0
def createTableIfNotExists(repo: Dolt, table: str):
    query: CreateQueryBuilder = Query.create_table(table=table) \
        .columns(
        Column("id", "bigint unsigned", nullable=False),
        Column("twitter_user_id", "bigint unsigned", nullable=False),

        Column("date", "datetime", nullable=False),
        Column("text", "longtext", nullable=False),
        Column("device", "longtext", nullable=False),

        Column("favorites", "bigint unsigned", nullable=False),
        Column("retweets", "bigint unsigned", nullable=False),
        Column("quoteTweets", "bigint unsigned"),
        Column("replies", "bigint unsigned"),

        Column("isRetweet", "tinyint", nullable=False),
        Column("isDeleted", "tinyint", nullable=False),

        Column("repliedToTweetId", "bigint unsigned"),
        Column("repliedToUserId", "bigint unsigned"),
        Column("repliedToTweetDate", "datetime"),

        Column("retweetedTweetId", "bigint unsigned"),
        Column("retweetedUserId", "bigint unsigned"),
        Column("retweetedTweetDate", "datetime"),

        Column("expandedUrls", "longtext"),

        Column("json", "longtext"),
        Column("json_v1", "longtext"),
        Column("notes", "longtext")
    ).primary_key("id")

    # TODO: Figure Out How To Add The Below Parameters
    # --------------------------------------------------------------------------------------------------------------
    # KEY `twitter_user_id_idx` (`twitter_user_id`),
    # CONSTRAINT `twitter_user_id_ref` FOREIGN KEY (`twitter_user_id`) REFERENCES `government` (`twitter_user_id`)
    # --------------------------------------------------------------------------------------------------------------
    # ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
    # --------------------------------------------------------------------------------------------------------------

    repo.sql(query=query.get_sql(quote_char=None), result_format="csv")
Esempio n. 28
0
def loader(loader_builder: DoltLoaderBuilder, dolt_dir: str, dry_run: bool):
    logger.info(
        '''Commencing load to Dolt with the following options:
                - dolt_dir  {dolt_dir}
        '''.format(dolt_dir=dolt_dir)
    )

    if not dry_run:
        loaders = loader_builder()
        for dolt_loader in loaders:
            dolt_loader(Dolt(dolt_dir))
Esempio n. 29
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--state', type=str, help='State to load data for', required=True)
    parser.add_argument('--load-data', )
    parser.add_argument('--dolt-dir', type=str, help='Dolt repo directory')
    parser.add_argument('--start-dolt-server', action='store_true')
    args = parser.parse_args()

    repo = Dolt(args.dolt_dir)
    if args.start_dolt_server:
        logger.info('start-dolt-server detected, starting server sub process')
        repo.sql_server(loglevel='trace')

    state_metadata = build_metadata_helper(args.state)

    load_to_dolt(repo,
                 'national_voting_data',
                 VOTING_DATA_PKS,
                 state_metadata,
                 filepath_to_precinct_file,
                 extract_precinct_voting_data)
def write_candidate(candidate: str):
    insert_statement = f'''
        insert into candidates (name, fec, fec_name) values (\"{candidate.upper()}\", null, null);
    '''

    repo: Dolt = Dolt('working/us-president-precinct-results')

    try:
        # repo.sql(insert_statement)
        logging.info(f"Added Candidate: {candidate.upper()}")
    except DoltException as e:
        None  # Silently Fail As Candidate Already In Database