Esempio n. 1
0
def calling_code(country=False):
    """Produces a calling code from a list of global calling codes.

    Returns:
      country (str): The country which corresponds to the calling code.
      calling_code (str): A variable length calling code.

    Example:
      >>> calling_code()
      AttrDict({u'calling_code': '961', 'country': u'Lebanon'})
      >>> calling_code("Denmark")
      AttrDict({u'calling_code': '45', 'country': u'Denmark'})
      >>> calling_code().country
      u'Guinea'
    """
    if country:
        cmd = 'SELECT country, calling_code FROM calling_codes WHERE ' \
            'country LIKE :_country LIMIT 1;'
        res = engine.execute(text(cmd), _country=country)
    else:
        res = engine.execute(
            'SELECT country, calling_code FROM calling_codes ORDER BY '
            'random() LIMIT 1;'
        )
    return AttrDict([dict(d) for d in res.fetchall()][0])
Esempio n. 2
0
def putDataForUserGermany():
    global engine
    api = configscript.setupTwitterAuth()
    places = api.geo_search(query="Germany", granularity="country")
    place_id = places[0].id
    tweets = tw.Cursor(api.search,
                       q="place:%s" % place_id,
                       tweet_mode='extended',
                       lang='en').items()

    for tweet in tweets:
        username = tweet.user.screen_name
        allTweets = tw.Cursor(api.user_timeline,
                              screen_name=username,
                              tweet_mode="extended",
                              exclude_replies=False,
                              include_rts=False,
                              lang='en').items(150)
        listAllTweets = list(allTweets)
        if (len(listAllTweets) < 1):
            continue

        tweetsDict = m.getTweetsDict(listAllTweets)
        score = m.getOverallScore(tweetsDict)
        if (score == -1):
            continue

        tweetsonlyscore = m.tweetsOnlyScore(tweetsDict)
        scoremax = m.getHappiestTweet(tweetsonlyscore)
        scoremin = m.getSaddestTweet(tweetsonlyscore)

        dict = {
            username: {
                "score": score,
                "min": scoremin["score"],
                "max": scoremax["score"]
            }
        }

        df = pd.DataFrame.from_dict(dict, orient='index')
        df.index.name = 'username'
        df.to_sql('germany_users', con=engine, if_exists='append')

        engine.execute(
            "DELETE FROM germany_users T1 USING germany_users T2 WHERE  T1.ctid  < T2.ctid AND  T1.username = T2.username;"
        )

    engine.dispose()
Esempio n. 3
0
def delete():
    '''
    Delete images
    '''

    # if img['dmca'] == 1 or img['illegal'] == 1:
    #     print board + '/' +  img['threadID'] + "/" + img['local_thumbnail']
    #     query = '''UPDATE {0}_mod SET imgur_thumbnail_url = 'test' WHERE local_thumbnail = %s'''.format(board)
    #     engine.execute(query, img['local_thumbnail'])

    # to_delete = ["692LfMR1HKotuic","nbxl2ZAcOTO2eoz", "nrsNcrYqUV0A9rH"]
    to_delete = engine.execute("SELECT * FROM sci_mod")

    # for row in to_delete:
    #     if row['dmca'] == 1 or row['illegal'] == 1:
    #         print "something"


    for row in to_delete:
        if (row['dmca'] == 1 or row['illegal'] == 1) and (row['imgur_thumbnail_url'] is not None or row['imgur_image_url'] is not None):
        # client.delete_image(img)
        # # These code snippets use an open-source library.
        # These code snippets use an open-source library. http://unirest.io/python
            response = unirest.delete("https://imgur-apiv3.p.mashape.com/3/image/{}".format(row['imgur_deletehash']),
                                      headers={
                                          "X-Mashape-Key": "y4mVnnNiZBmshBb9s7rh4DSw6K53p1T5SDujsnxxEUEvw62m4L",
                                          "Authorization": "Client-ID 999ee789e084f2e",
                                          "Accept": "text/plain"
                                      }
                                      )
            print response.body['status']

            if response.body['status'] == 200:
                print "was successful"
Esempio n. 4
0
def delete():
    '''
    Delete images
    '''


    to_delete = engine.execute("SELECT * FROM sci_mod")




    for row in to_delete:
        if (row['dmca'] == 1 or row['illegal'] == 1) and (row['imgur_thumbnail_url'] is not None or row['imgur_image_url'] is not None):
        # client.delete_image(img)
        # # These code snippets use an open-source library.
        # These code snippets use an open-source library. http://unirest.io/python
            response = unirest.delete("https://imgur-apiv3.p.mashape.com/3/image/{}".format(row['imgur_deletehash']),
                                      headers={
                "X-Mashape-Key": "y4mVnnNiZBmshBb9s7rh4DSw6K53p1T5SDujsnxxEUEvw62m4L",
                "Authorization": "Client-ID 999ee789e084f2e",
                "Accept": "text/plain"
              }
            )
            print response.body['status']

            if response.body['status'] == 200:
                print "was successful"
Esempio n. 5
0
def state_abbreviated():
    """
    This function produces just a state abbreviation.
    eg - state_abbreviated() = 'NY'
    """
    res = engine.execute("SELECT abbreviation FROM states "
                         "ORDER BY random() LIMIT 1;")
    return AttrDict([dict(d) for d in res.fetchall()][0])
Esempio n. 6
0
def city_with_state():
    """
    This function produces a city with a state.
    ie - city_with_state() = 'New York, NY'
    """
    res = engine.execute("SELECT city, state FROM us_cities "
                         "ORDER BY random() LIMIT 1;")
    return ', '.join(res.fetchall()[0])
Esempio n. 7
0
def putDataDB():
    global engine
    api = configscript.setupTwitterAuth()
    places = api.geo_search(query="Denmark", granularity="country")
    place_id = places[0].id
    tweets = tw.Cursor(api.search,
                       q="place:%s" % place_id,
                       tweet_mode='extended',
                       lang='en').items()
    df = pd.DataFrame.from_dict(m.getTweetsDictRaw(tweets), orient='index')
    df.set_index('id', inplace=True)
    df.to_sql('tweets', con=engine, if_exists='append')

    engine.execute(
        "DELETE FROM tweets T1 USING tweets T2 WHERE  T1.ctid  < T2.ctid AND  T1.id    = T2.id AND  T1.score = T2.score AND  T1.created = T2.created;"
    )

    engine.dispose()
Esempio n. 8
0
    def _populate_lineage(self) -> None:
        url = self.config.get_sql_alchemy_url()
        logger.debug(f"sql_alchemy_url={url}")
        engine = create_engine(
            url,
            connect_args=self.config.get_sql_alchemy_connect_args(),
            **self.config.options,
        )
        query: str = """
WITH table_lineage_history AS (
    SELECT
        r.value:"objectName" AS upstream_table_name,
        r.value:"objectDomain" AS upstream_table_domain,
        r.value:"columns" AS upstream_table_columns,
        w.value:"objectName" AS downstream_table_name,
        w.value:"objectDomain" AS downstream_table_domain,
        w.value:"columns" AS downstream_table_columns,
        t.query_start_time AS query_start_time
    FROM
        (SELECT * from snowflake.account_usage.access_history) t,
        lateral flatten(input => t.DIRECT_OBJECTS_ACCESSED) r,
        lateral flatten(input => t.OBJECTS_MODIFIED) w
    WHERE r.value:"objectId" IS NOT NULL
    AND w.value:"objectId" IS NOT NULL
    AND w.value:"objectName" NOT LIKE '%.GE_TMP_%'
    AND w.value:"objectName" NOT LIKE '%.GE_TEMP_%'
    AND t.query_start_time >= to_timestamp_ltz({start_time_millis}, 3)
    AND t.query_start_time < to_timestamp_ltz({end_time_millis}, 3))
SELECT upstream_table_name, downstream_table_name, upstream_table_columns, downstream_table_columns
FROM table_lineage_history
WHERE upstream_table_domain in ('Table', 'External table') and downstream_table_domain = 'Table'
QUALIFY ROW_NUMBER() OVER (PARTITION BY downstream_table_name, upstream_table_name ORDER BY query_start_time DESC) = 1        """.format(
            start_time_millis=int(self.config.start_time.timestamp() * 1000),
            end_time_millis=int(self.config.end_time.timestamp() * 1000),
        )
        num_edges: int = 0
        self._lineage_map = defaultdict(list)
        try:
            for db_row in engine.execute(query):
                # key is the down-stream table name
                key: str = db_row[1].lower().replace('"', "")
                self._lineage_map[key].append(
                    # (<upstream_table_name>, <json_list_of_upstream_columns>, <json_list_of_downstream_columns>)
                    (db_row[0].lower().replace('"', ""), db_row[2], db_row[3]))
                num_edges += 1
                logger.debug(
                    f"Lineage[Table(Down)={key}]:Table(Up)={self._lineage_map[key]}"
                )
        except Exception as e:
            logger.warning(
                f"Extracting lineage from Snowflake failed."
                f"Please check your premissions. Continuing...\nError was {e}."
            )
        logger.info(
            f"A total of {num_edges} Table->Table edges found"
            f" for {len(self._lineage_map)} downstream tables.", )
        self.report.num_table_to_table_edges_scanned = num_edges
Esempio n. 9
0
def rowExists(engine, primKey, tableName, primColName):
    primKey = str(primKey)
    query = "SELECT * FROM %s WHERE %s=\'%s\'" % (tableName, primColName,
                                                  primKey)
    result = engine.execute(query)
    count = 0
    for row in result:
        count += 1
    return bool(count)
Esempio n. 10
0
def batchInsert(engine):
    connection = engine.connect()
    fd = open('sampledata.sql', 'r')
    sqlFile = fd.read()
    fd.close()

    # all SQL commands (split on ';')
    sqlCommands = sqlFile.split(';')

    # Execute every command from the input file
    for command in sqlCommands:
        # This will skip and report errors
        # For example, if the tables do not yet exist, this will skip over
        # the DROP TABLE commands
        try:
            engine.execute(command)
        except:
            print "error"
    connection.close()
Esempio n. 11
0
def db_name(db_host: str, db_user: str) -> typing.Iterable[str]:
    admin_user = os.environ.get('TRIBBLE_DB_ADMIN_USER', 'root')
    admin_password = os.environ.get('TRIBBLE_DB_ADMIN_PASSWORD')

    creds = database.Creds(host=db_host,
                           user=admin_user,
                           password=admin_password,
                           database='mysql')

    with warnings.catch_warnings():
        warnings.filterwarnings('ignore',
                                ".*\'@@tx_isolation\' is deprecated.*")
        engine = database.connect_db(creds)
        connection = engine.connect()

        db_names = connection.execute('SHOW DATABASES;').fetchall()

        for (db_name, ) in db_names:
            if db_name.startswith('tribble_test_'):
                engine.execute(f'DROP SCHEMA {db_name}')

        database_name = 'tribble_test_{0:0>6}'.format(
            random.randrange(1, 1000000))
        connection.execute(f'CREATE SCHEMA {database_name};')

        connection.execute(f'USE {database_name};')
        connection.execute(
            f'GRANT ALL ON {database_name}.* TO {db_user}@{db_host};')
        connection.execute('FLUSH PRIVILEGES;')

        connection.close()

        yield database_name

        connection = engine.connect()

        connection.execute(f'DROP SCHEMA {database_name};')
        connection.execute(
            f'REVOKE ALL ON {database_name}.* FROM {db_user}@{db_host};')
        connection.execute('FLUSH PRIVILEGES;')
        connection.close()
Esempio n. 12
0
def zipcode(state=None):
    """This function will pick a zipcode randomnly from a list.
    eg - zipcode() = '11221'.
    """
    range_gen = []
    state = state or state_abbreviated().abbreviation
    cmd = 'SELECT min, max FROM zipcodes WHERE st = :_state'
    res = engine.execute(text(cmd), _state=state)
    _range = res.fetchall()
    for r in _range:
        range_gen.extend(range(int(r[0]), int(r[1] + 1)))
    return '%05d' % choice(range_gen)
Esempio n. 13
0
def male():
    """Generate a 'male' name.

    Returns:
      A string.

    Example:
      >>> name()
      'Anthony'
    """
    res = engine.execute("SELECT name FROM male ORDER BY random() LIMIT 1;")
    return AttrDict([dict(d) for d in res.fetchall()][0])
Esempio n. 14
0
    def test_retval_flag(self):
        canary = []

        def tracker(name):
            def go(conn, *args, **kw):
                canary.append(name)

            return go

        def execute(conn, clauseelement, multiparams, params):
            canary.append("execute")
            return clauseelement, multiparams, params

        def cursor_execute(
            conn, cursor, statement, parameters, context, executemany
        ):
            canary.append("cursor_execute")
            return statement, parameters

        engine = engines.testing_engine()

        assert_raises(
            tsa.exc.ArgumentError,
            event.listen,
            engine,
            "begin",
            tracker("begin"),
            retval=True,
        )

        event.listen(engine, "before_execute", execute, retval=True)
        event.listen(
            engine, "before_cursor_execute", cursor_execute, retval=True
        )
        with testing.expect_deprecated(
            r"The argument signature for the "
            r"\"ConnectionEvents.before_execute\" event listener",
        ):
            engine.execute(select(1))
        eq_(canary, ["execute", "cursor_execute"])
Esempio n. 15
0
def get_threads(url, board, dir):






    engine = create_engine('mysql+pymysql://root:magical18'
                               '@localhost/chanarive')

    print engine

    # metadata = MetaData()
    # chanarive = Table('b_temp', metadata,
    #     Column('id', Integer(), primary_key=True),
    #     Column('threadID', String()),
    #     Column('position', Integer()),
    #     Column('previous_position', Integer()),
    #
    #     )
    # metadata.create_all(engine)


    # ins = insert('b_temp').values(
    #             # post_id is auto inserted
    #             threadID='324325',
    #             position=4,
    #             previous_position=64,
    #
    #         )

    # insert into database the parsed logic
    # engine.execute("INSERT INTO b_temp (threadID,position,previous_position) VALUES ('2343', 34, 54)")
    # rows = 0
    # item = engine.execute("SELECT * FROM b_temp WHERE threadID = '689718729'")
    # num_rows = engine.execute("SELECT COUNT(*) FROM b_temp")
    # for i in num_rows:
    #     rows = i[0]
    engine.execute("UPDATE b_temp SET remove = 1 WHERE remove = 0")
    engine.execute("UPDATE b_temp SET bot = 0 WHERE bot = 1")
    engine.execute("UPDATE b_temp SET previous_position = current_position")


    # creat a for each loop of all parsed threads
    items = engine.execute("SELECT IF ( EXISTS( SELECT * FROM b_temp WHERE threadID = '{}'), 1, 0)".format('690s617627'))
    # items = engine.execute("SELECT remove FROM b_temp")
    # print len(items)
    for i in items:
         if i[0]:
             # update rank
             print "It exists!"
         else:
             # insert into database
             print "404!"
Esempio n. 16
0
def gender(extended=False):
    """Returns a random gender.

    Argument:
      extended (bool): Returns from Female or Male if False.
      if True, returns from 50+ genders.
    """
    if extended:
        res = engine.execute("SELECT gender FROM gender_extended "
                             "ORDER BY random() LIMIT 1;")
        return AttrDict([dict(x) for x in res.fetchall()][0])
    else:
        return choice(['Male', 'Female'])
Esempio n. 17
0
def lat_long(state="NY", radius=150000):
    """Generates a random latitude, and longitude.

    The lat and long will be within the radius, of the state of your choice.

    Note: The radius seems to need to be over 150,000 to produce a difference
    from the original lat and long.

    Arguments:
      state (str): The 2 letter abbreviation for the state of your choosing.
      radius (int): The radius which will be used to generate a lat and long
      inside of.

    Returns:
      abbrev (string): The 2 letter abbreviation of the chosen state.
      lat (string): A generated latitude of varying length.
      long (string): A generated longetude of varying length.

    Examples:
      >>> d_gen.lat_long("NY", 150000)
      AttrDict({
        u'lat': '41.9656885445',
        u'abbrev': u'NY',
        u'long': '-75.9459285158'
      })
      >>> lat_long("NY", 175000).lat
      '43.0438318157'
    """
    cmd = 'SELECT abbrev,lat,long FROM us_s_ll WHERE abbrev = :_st LIMIT 1;'
    res = engine.execute(text(cmd), _st=state)
    data = AttrDict([dict(d) for d in res.fetchall()][0])

    radius_in_degrees = radius / 111300

    x0 = float(data["long"])
    y0 = float(data["lat"])

    u = round(uniform(0.1, 1.0), 6)
    v = round(uniform(0.1, 1.0), 6)

    w = radius_in_degrees * sqrt(u)
    t = 2 * pi * v
    x = w * cos(t)
    y1 = w * sin(t)
    x1 = x / cos(y0)

    obj = LatLon(y0 + y1, x0 + x1).to_string()
    data["lat"] = obj[0]
    data["long"] = obj[1]

    return data
Esempio n. 18
0
def mime_type():
    """Generates a random mime type.

    Returns:
      name (str): The full name of the mime type.
      extension (str): The file extension of the mime type.

    Examples:
      >>> mime_type()
      AttrDict({u'name': u'application.x-excel', u'extension': u'.xlv'})
    """
    res = engine.execute("SELECT name, extension FROM mimes ORDER BY  \
                         random() LIMIT 1;")
    return AttrDict([dict(d) for d in res.fetchall()][0])
Esempio n. 19
0
def celebrityScore(username):
    print(username)
    global engine
    api = configscript.setupTwitterAuth()
    allTweets = tw.Cursor(api.user_timeline,
                          screen_name=username,
                          tweet_mode="extended",
                          exclude_replies=False,
                          include_rts=False,
                          lang='en').items()
    listAllTweets = list(allTweets)

    if (len(listAllTweets) == 0):
        return

    tweetsDict = m.getTweetsDict(listAllTweets)
    score = m.getOverallScore(tweetsDict)

    if (score == -1):
        return

    user = api.get_user(username)
    # Remove _normal from profile image URL
    profile_image_url = user.profile_image_url_https
    url = re.sub('_normal', '', profile_image_url)

    dict = {username: {"score": score, "pic": url}}

    df = pd.DataFrame.from_dict(dict, orient='index')
    df.index.name = 'username'
    df.to_sql('celebrity', con=engine, if_exists='append')

    engine.execute(
        "DELETE FROM celebrity T1 USING celebrity T2 WHERE  T1.ctid  < T2.ctid AND  T1.username = T2.username;"
    )

    engine.dispose()
Esempio n. 20
0
def areacode(state=None):
    """Returns a random zipcode from a list of US zipcodes.

    Argument:
      state (str): 2 letter state abbreviation.

    Returns:
      areacode (unicode): 3 digit area code.

    Examples:
      >>> areacode()
      '810'
      >>> areacode('NY')
      '718'
    """
    if state:
        cmd = 'SELECT areacode, state FROM areacodes WHERE state = :_state ' \
              'ORDER BY RANDOM() LIMIT 1;'
        res = engine.execute(text(cmd), _state=state)
    else:
        res = engine.execute(
            'SELECT areacode, state FROM areacodes ORDER BY random() LIMIT 1;'
        )
    return AttrDict([dict(d) for d in res.fetchall()][0])
Esempio n. 21
0
def timezone_offset(dst=True, utc=True):
    # Todo: Set both to false by default.
    """Generates a random timezone offset.

    Arguments:
      dst (bool): Enable dst selection.
      utc (bool): Enable utc offset.

    Returns:
      dst: Returns a dst offset.
      utc: Returns a utc offset.

    Examples:
      >>> timezone_offset()
      {'utc': u'+06:30', 'dst': u'-03:00'}
      >>> timezone_offset(dst=False)
      {'utc': u'+10:00'}
      >>> timezone_offset(utc=False)
      {'dst': u'-03:00'}
      >>> timezone_offset(utc=False, dst=False)
      {}
      >>> timezone_offset(utc=False).dst
      '-4:30'
    """
    data = {}
    if dst:
        res = engine.execute("SELECT DISTINCT(dst) FROM timezones ORDER BY "
                             "random() LIMIT 1;")
        data["dst"] = res.fetchall()[0][0]

    if utc:
        res = engine.execute("SELECT DISTINCT(utc) FROM timezones ORDER BY "
                             "random() LIMIT 1;")
        data["utc"] = res.fetchall()[0][0]

    return AttrDict(data)
Esempio n. 22
0
def timezone_offset_country():
    """Generates a random country from the timezone country list.

    Returns:
      country (str): Name of the generated country.

    Examples:
      >>> timezone_offset_country()
      AttrDict({'country': u'Maldives'})
      >>> timezone_offset_country().country
      AttrDict({'country': u'Maldives'})
    """
    res = engine.execute("SELECT country FROM timezones ORDER BY random() "
                         "LIMIT 1;")
    return AttrDict({"country": res.fetchall()[0][0]})
Esempio n. 23
0
def _mysql_get_effective_sql_mode(engine):
    """Returns the effective SQL mode for connections from the engine pool.

    Returns ``None`` if the mode isn't available, otherwise returns the mode.

    """
    # Get the real effective SQL mode. Even when unset by
    # our own config, the server may still be operating in a specific
    # SQL mode as set by the server configuration.
    # Also note that the checkout listener will be called on execute to
    # set the mode if it's registered.
    row = engine.execute("SHOW VARIABLES LIKE 'sql_mode'").fetchone()
    if row is None:
        return
    return row[1]
Esempio n. 24
0
    def _populate_view_upstream_lineage(self, engine: sqlalchemy.engine.Engine) -> None:
        # NOTE: This query captures only the upstream lineage of a view (with no column lineage).
        # For more details see: https://docs.snowflake.com/en/user-guide/object-dependencies.html#object-dependencies
        # and also https://docs.snowflake.com/en/sql-reference/account-usage/access_history.html#usage-notes for current limitations on capturing the lineage for views.
        view_upstream_lineage_query: str = """
SELECT
  concat(
    referenced_database, '.', referenced_schema,
    '.', referenced_object_name
  ) AS view_upstream,
  concat(
    referencing_database, '.', referencing_schema,
    '.', referencing_object_name
  ) AS downstream_view
FROM
  snowflake.account_usage.object_dependencies
WHERE
  referencing_object_domain in ('VIEW', 'MATERIALIZED VIEW')
        """

        assert self._lineage_map is not None
        num_edges: int = 0

        try:
            for db_row in engine.execute(view_upstream_lineage_query):
                # Process UpstreamTable/View/ExternalTable/Materialized View->View edge.
                view_upstream: str = db_row["view_upstream"].lower()
                view_name: str = db_row["downstream_view"].lower()
                if not self._is_dataset_allowed(dataset_name=view_name, is_view=True):
                    continue
                # key is the downstream view name
                self._lineage_map[view_name].append(
                    # (<upstream_table_name>, <empty_json_list_of_upstream_table_columns>, <empty_json_list_of_downstream_view_columns>)
                    (view_upstream, "[]", "[]")
                )
                num_edges += 1
                logger.debug(
                    f"Upstream->View: Lineage[View(Down)={view_name}]:Upstream={view_upstream}"
                )
        except Exception as e:
            self.warn(
                logger,
                "view_upstream_lineage",
                "Extracting the upstream view lineage from Snowflake failed."
                + f"Please check your permissions. Continuing...\nError was {e}.",
            )
        logger.info(f"A total of {num_edges} View upstream edges found.")
        self.report.num_table_to_view_edges_scanned = num_edges
Esempio n. 25
0
def update_all_slugs_in_mtgmetaio(card_tname=MTGMETA_CARDS_TNAME,
                                  engine=engine) -> None:
    logger.info("Updating card slugs")
    for old, new in get_missing().items():
        logger.debug(f"{old}: {new}")
        query = f"""
            UPDATE "{card_tname}"
            SET 
            "card_slug" = REPLACE (
                "card_slug",
                '{old}',
                '{new}'
            )
            WHERE "card_slug"='{old}'
        """
        a = engine.execute(query)
        logger.debug(a)
Esempio n. 26
0
def html_name():
    """Generates a random html color name.

    Returns:
      name (str): The name of the picked color.
      hex (str): The corresponding hex code of the color.

    Examples:
      >>> html_name()
      AttrDict({u'hex': u'80008', u'name': u'purple'})
      >>> html_name().hex
      '808080'
    """
    res = engine.execute(
        "SELECT name, hex FROM html_colors ORDER BY RANDOM() LIMIT 1;"
    )

    return AttrDict([dict(d) for d in res.fetchall()][0])
Esempio n. 27
0
def phone_number(state=None):
    """Generates a phone number in multiple formats.

    Conforms to NANP standards.

    Argument:
      state (string): Returns a phone number from an areacode in this
      specified state.

    Returns:
      areacode (string): 3 digit area code.
      domestic (string): Phone number formatted to the domestic dial standard.
      international (string): Phone number formatted to the international
      dial standard.
      local (string): Phone number formatted to the local dial standard.
      plain (string): Phone number without formatting.
      standard (string): Phone number formatted to the standard dial standard.
      state (string): The state the phone number corresponds to.

    Examples:
      >>> phone_number()
      AttrDict({
        'areacode': '562',
        'domestic': '(562) 422-9802',
        'international': '+1-562-422-9802',
        'local': '422-9802',
        'plain': '5624229802',
        'standard': '562-422-9802'
        'state': u'CA',
      })
      >>> phone_number().state
      'NY'
      >>> phone_number().international
      '+1-574-720-9722'
      >>> phone_number("NY").domestic
      '718-288-1000'
    """
    data = {}
    valid_number = False
    invalid_prefixes = ["911", "555", "311", "411"]

    # If the number that we generate appears in the invalid_prefixes
    # list, then we will regenerate until the chosen number is not.
    a, b, c = (False,) * 3
    while not valid_number:
        a = str(areacode(state).areacode) if state else str(
            areacode().areacode)
        b = str(randint(2, 9)) + str(number(2))
        if a or b not in invalid_prefixes:
            break

    # Tack on 4 digits to the end.
    c = number(4)

    # Enter our data in to a dict.
    data["areacode"] = a
    data["local"] = "{0}-{1}".format(b, c)
    data["domestic"] = "({0}) {1}-{2}".format(a, b, c)
    data["international"] = "+1-{0}-{1}-{2}".format(a, b, c)
    data["standard"] = "{0}-{1}-{2}".format(a, b, c)
    data["plain"] = a + b + c

    if state:
        cmd = 'SELECT state FROM areacodes WHERE state = :_state'
        res = engine.execute(text(cmd), _state=state)
        for d in res.fetchall():
            data["state"] = d[0]

    return AttrDict(data)
Esempio n. 28
0
def language():
    """Picks a random language."""
    res = engine.execute(
        "SELECT name FROM languages ORDER BY random() LIMIT 1;")
    return AttrDict([dict(d) for d in res.fetchall()][0])
Esempio n. 29
0
        # initialize columns
        self.item_price_total = item_price_total
        self.item_quantity_total = item_quantity_total

    def __repr__(self):
        return '<shoppingCart %r>' % self.shoppingCart_id


class Shoppingcartitems(db.Model):
    __tablename__ = 'Shopping_cart_items'
    shopping_cartId = db.Column(db.Integer, db.ForeignKey('shoppingCart.shoppingCart_id'), nullable=False,
                                primary_key=True)
    shoppingCart = db.relationship("shoppingcart", backref=db.backref("shoppingCart", uselist=False))
    item_id = db.Column(db.Integer, db.ForeignKey('Items.item_id'), nullable=False, primary_key=True)
    Items = db.relationship("Items", backref=db.backref("Items", uselist=False))

    def __init__(self, shopping_cartId, item_id):
        # initialize columns
        self.shopping_cartId = shopping_cartId
        self.item_id = item_id

# connect to server
engine = create_engine('mysql://%s:%s@%s' % (USER, PASSWORD, HOSTNAME))

# create db if not exists
engine.execute("CREATE DATABASE IF NOT EXISTS %s " % (DATABASE))

db.create_all()
if __name__ == '__main__':
    manager.run()
Esempio n. 30
0
    def do_provision_role_internal(self):
        provision_role_block = self.config.provision_role
        if provision_role_block is None:
            return
        self.report.provision_role_done = not provision_role_block.dry_run

        role = self.config.role
        if role is None:
            role = "datahub_role"
            self.warn(
                logger,
                "role-grant",
                f"role not specified during provision role using {role} as default",
            )
        self.report.role = role

        warehouse = self.config.warehouse

        logger.info("Creating connection for provision_role")
        engine = self.get_metadata_engine(database=None)

        sqls: List[str] = []
        if provision_role_block.drop_role_if_exists:
            sqls.append(f"DROP ROLE IF EXISTS {role}")

        sqls.append(f"CREATE ROLE IF NOT EXISTS {role}")

        if warehouse is None:
            self.warn(logger, "role-grant",
                      "warehouse not specified during provision role")
        else:
            sqls.append(
                f"grant operate, usage on warehouse {warehouse} to role {role}"
            )

        for inspector in self.get_inspectors():
            db_name = self.get_db_name(inspector)
            sqls.extend([
                f"grant usage on DATABASE {db_name} to role {role}",
                f"grant usage on all schemas in database {db_name} to role {role}",
                f"grant usage on future schemas in database {db_name} to role {role}",
            ])
            if self.config.profiling.enabled:
                sqls.extend([
                    f"grant select on all tables in database {db_name} to role {role}",
                    f"grant select on future tables in database {db_name} to role {role}",
                    f"grant select on all external tables in database {db_name} to role {role}",
                    f"grant select on future external tables in database {db_name} to role {role}",
                    f"grant select on all views in database {db_name} to role {role}",
                    f"grant select on future views in database {db_name} to role {role}",
                ])
            else:
                sqls.extend([
                    f"grant references on all tables in database {db_name} to role {role}",
                    f"grant references on future tables in database {db_name} to role {role}",
                    f"grant references on all external tables in database {db_name} to role {role}",
                    f"grant references on future external tables in database {db_name} to role {role}",
                    f"grant references on all views in database {db_name} to role {role}",
                    f"grant references on future views in database {db_name} to role {role}",
                ])
        if self.config.username is not None:
            sqls.append(f"grant role {role} to user {self.config.username}")

        if self.config.include_table_lineage or self.config.include_view_lineage:
            sqls.append(
                f"grant imported privileges on database snowflake to role {role}"
            )

        dry_run_str = "[DRY RUN] " if provision_role_block.dry_run else ""
        for sql in sqls:
            logger.info(f"{dry_run_str} Attempting to run sql {sql}")
            if provision_role_block.dry_run:
                continue
            try:
                engine.execute(sql)
            except Exception as e:
                self.error(logger, "role-grant", f"Exception: {e}")

        self.report.provision_role_success = not provision_role_block.dry_run
Esempio n. 31
0
def surname():
    """Returns a randomly chosen surname."""
    res = engine.execute("SELECT name FROM surname "
                         "ORDER BY random() LIMIT 1;")
    return AttrDict([dict(d) for d in res.fetchall()][0])
Esempio n. 32
0
def career():
    """This function will produce a career."""
    res = engine.execute("SELECT name FROM careers ORDER BY random() LIMIT 1;")
    return AttrDict([dict(d) for d in res.fetchall()][0])
Esempio n. 33
0
            deletehash = response.body['data']['deletehash']

            query_string = '''UPDATE {0}_mod set imgur_id = %s, imgur_image_url = %s, imgur_deletehash = %s WHERE local_thumbnail = %s'''.format(board)
            engine.execute(query_string, imgur_image_id, imgur_image_url, deletehash, image_name)
        except Exception as e:
            print e
            pass









for board in boards:
    # Runs through and determines if a row either needs an upload of an image or a deletion
    board_rows = engine.execute("SELECT * FROM {0}_mod".format(board))

    for img in board_rows:
        # print "another row"
        if (img['moderated'] == 1 and img['approved'] == 1) and img['imgur_thumbnail_url'] is None and img['local_thumbnail'] is not None:
            # print "Upload the images and update the database"
            upload(img['local_thumbnail'], board)

        # delete()

    # Runs through entire database for any deletables
    # delete()
Esempio n. 34
0
def company_name():
    """This function will return a company name"""
    res = engine.execute(
        "SELECT name FROM companies ORDER BY random() LIMIT 1;")
    return AttrDict([dict(d) for d in res.fetchall()][0])
Esempio n. 35
0
    def _populate_view_downstream_lineage(
            self, engine: sqlalchemy.engine.Engine) -> None:
        # NOTE: This query captures both the upstream and downstream table lineage for views.
        # We need this query to populate the downstream lineage of a view,
        # as well as to delete the false direct edges between the upstream and downstream tables of a view.
        # See https://docs.snowflake.com/en/sql-reference/account-usage/access_history.html#usage-notes for current limitations on capturing the lineage for views.
        # Eg: For viewA->viewB->ViewC->TableD, snowflake does not yet log intermediate view logs, resulting in only the viewA->TableD edge.
        view_lineage_query: str = """
WITH view_lineage_history AS (
  SELECT
    vu.value : "objectName" AS view_name,
    vu.value : "objectDomain" AS view_domain,
    vu.value : "columns" AS view_columns,
    r.value : "objectName" AS upstream_table_name,
    r.value : "objectDomain" AS upstream_table_domain,
    r.value : "columns" AS upstream_table_columns,
    w.value : "objectName" AS downstream_table_name,
    w.value : "objectDomain" AS downstream_table_domain,
    w.value : "columns" AS downstream_table_columns,
    t.query_start_time AS query_start_time
  FROM
    (
      SELECT
        *
      FROM
        snowflake.account_usage.access_history
    ) t,
    lateral flatten(input => t.DIRECT_OBJECTS_ACCESSED) vu,
    lateral flatten(input => t.BASE_OBJECTS_ACCESSED) r,
    lateral flatten(input => t.OBJECTS_MODIFIED) w
  WHERE
    vu.value : "objectId" IS NOT NULL
    AND r.value : "objectId" IS NOT NULL
    AND w.value : "objectId" IS NOT NULL
    AND t.query_start_time >= to_timestamp_ltz({start_time_millis}, 3)
    AND t.query_start_time < to_timestamp_ltz({end_time_millis}, 3)
)
SELECT
  view_name,
  view_columns,
  upstream_table_name,
  upstream_table_domain,
  upstream_table_columns,
  downstream_table_name,
  downstream_table_domain,
  downstream_table_columns
FROM
  view_lineage_history
WHERE
  view_domain in ('View', 'Materialized view')
  AND view_name != upstream_table_name
  AND upstream_table_name != downstream_table_name
  AND view_name != downstream_table_name
  QUALIFY ROW_NUMBER() OVER (
    PARTITION BY view_name,
    upstream_table_name,
    downstream_table_name
    ORDER BY
      query_start_time DESC
  ) = 1
        """.format(
            start_time_millis=int(self.config.start_time.timestamp() * 1000),
            end_time_millis=int(self.config.end_time.timestamp() * 1000),
        )

        assert self._lineage_map is not None
        num_edges: int = 0
        num_false_edges: int = 0

        try:
            for db_row in engine.execute(view_lineage_query):
                # We get two edges here.
                # (1) False UpstreamTable->Downstream table that will be deleted.
                # (2) View->DownstreamTable that will be added.

                view_name: str = db_row[0].lower().replace('"', "")
                upstream_table: str = db_row[2].lower().replace('"', "")
                downstream_table: str = db_row[5].lower().replace('"', "")
                # (1) Delete false direct edge between upstream_table and downstream_table
                prior_edges: List[Tuple[
                    str, str, str]] = self._lineage_map[downstream_table]
                self._lineage_map[downstream_table] = [
                    entry for entry in self._lineage_map[downstream_table]
                    if entry[0] != upstream_table
                ]
                for false_edge in set(prior_edges) - set(
                        self._lineage_map[downstream_table]):
                    logger.debug(
                        f"False Table->Table edge removed: Lineage[Table(Down)={downstream_table}]:Table(Up)={false_edge}."
                    )
                    num_false_edges += 1

                # (2) Add view->downstream table lineage.
                self._lineage_map[downstream_table].append(
                    # (<upstream_view_name>, <json_list_of_upstream_view_columns>, <json_list_of_downstream_columns>)
                    (view_name, db_row[1], db_row[7]))
                logger.debug(
                    f"View->Table: Lineage[Table(Down)={downstream_table}]:View(Up)={self._lineage_map[downstream_table]}, downstream_domain={db_row[6]}"
                )
                num_edges += 1

        except Exception as e:
            logger.warning(
                f"Extracting the view lineage from Snowflake failed."
                f"Please check your permissions. Continuing...\nError was {e}."
            )
        logger.info(
            f"Found {num_edges} View->Table edges. Removed {num_false_edges} false Table->Table edges."
        )
        self.report.num_view_to_table_edges_scanned = num_edges
Esempio n. 36
0
    def _populate_view_downstream_lineage(
            self, engine: sqlalchemy.engine.Engine) -> None:
        # This query captures the downstream table lineage for views.
        # See https://docs.snowflake.com/en/sql-reference/account-usage/access_history.html#usage-notes for current limitations on capturing the lineage for views.
        # Eg: For viewA->viewB->ViewC->TableD, snowflake does not yet log intermediate view logs, resulting in only the viewA->TableD edge.
        view_lineage_query: str = """
WITH view_lineage_history AS (
  SELECT
    vu.value : "objectName" AS view_name,
    vu.value : "objectDomain" AS view_domain,
    vu.value : "columns" AS view_columns,
    w.value : "objectName" AS downstream_table_name,
    w.value : "objectDomain" AS downstream_table_domain,
    w.value : "columns" AS downstream_table_columns,
    t.query_start_time AS query_start_time
  FROM
    (
      SELECT
        *
      FROM
        snowflake.account_usage.access_history
    ) t,
    lateral flatten(input => t.DIRECT_OBJECTS_ACCESSED) vu,
    lateral flatten(input => t.OBJECTS_MODIFIED) w
  WHERE
    vu.value : "objectId" IS NOT NULL
    AND w.value : "objectId" IS NOT NULL
    AND w.value : "objectName" NOT LIKE '%.GE_TMP_%'
    AND w.value : "objectName" NOT LIKE '%.GE_TEMP_%'
    AND t.query_start_time >= to_timestamp_ltz({start_time_millis}, 3)
    AND t.query_start_time < to_timestamp_ltz({end_time_millis}, 3)
)
SELECT
  view_name,
  view_columns,
  downstream_table_name,
  downstream_table_columns
FROM
  view_lineage_history
WHERE
  view_domain in ('View', 'Materialized view')
  QUALIFY ROW_NUMBER() OVER (
    PARTITION BY view_name,
    downstream_table_name
    ORDER BY
      query_start_time DESC
  ) = 1
        """.format(
            start_time_millis=int(self.config.start_time.timestamp() * 1000)
            if not self.config.ignore_start_time_lineage else 0,
            end_time_millis=int(self.config.end_time.timestamp() * 1000),
        )

        assert self._lineage_map is not None
        self.report.num_view_to_table_edges_scanned = 0

        try:
            db_rows = engine.execute(view_lineage_query)
        except Exception as e:
            self.warn(
                logger,
                "view_downstream_lineage",
                f"Extracting the view lineage from Snowflake failed."
                f"Please check your permissions. Continuing...\nError was {e}.",
            )
        else:
            for db_row in db_rows:
                view_name: str = db_row["view_name"].lower().replace('"', "")
                downstream_table: str = (
                    db_row["downstream_table_name"].lower().replace('"', ""))
                # Capture view->downstream table lineage.
                self._lineage_map[downstream_table].append(
                    # (<upstream_view_name>, <json_list_of_upstream_view_columns>, <json_list_of_downstream_columns>)
                    (
                        view_name,
                        db_row["view_columns"],
                        db_row["downstream_table_columns"],
                    ))
                self.report.num_view_to_table_edges_scanned += 1

                logger.debug(
                    f"View->Table: Lineage[Table(Down)={downstream_table}]:View(Up)={self._lineage_map[downstream_table]}"
                )

        logger.info(
            f"Found {self.report.num_view_to_table_edges_scanned} View->Table edges."
        )
# print header_html
# thread = ""
# for i in header_html:
#     print i
#     print str(i[0])
#     thread += str(i[0])
#
# thread_to_generate = engine.execute("SELECT post_message FROM sci WHERE threadID = '{}'".format(thread_num))
# for i in thread_to_generate:
#     thread += i[0]





thread_number = engine.execute("SELECT DISTINCT threadID FROM sci ORDER BY threadID DESC")

for i in thread_number:
    thread_num = i[0]
    posts = engine.execute("SELECT position_in_thread, post_message FROM sci WHERE threadID = {} ORDER BY position_in_thread ASC".format(i[0]))
    thread = ""

    header_html = engine.execute("SELECT header_html FROM header WHERE boardname = 'sci'")
    for i in header_html:
        thread += i[0]

    for j in posts:
        # print j[1]
        thread += j[1]

    file = open('/var/www/{}/{}.html'.format(board,thread_num), 'w')
Esempio n. 38
0
def upload(image_name, board):
    # This will upload the file
    query_string = '''SELECT * FROM {0}_mod WHERE local_thumbnail = %s'''.format(board)
    row = engine.execute(query_string, image_name)



    for item in row:
        # necessary variables
        threadID = item['threadID']
        thumbnail = item['local_thumbnail']
        image = item['local_image']


        directory_thumb = '/' + board + '/' + threadID + '/' + thumbnail
        directory_image = '/' + board + '/' + threadID + '/' + image



        """
        Upload the thumbnail
        """
        try:
            response = unirest.post("https://imgur-apiv3.p.mashape.com/3/image",
                                    headers={
                                        "X-Mashape-Key": "y4mVnnNiZBmshBb9s7rh4DSw6K53p1T5SDujsnxxEUEvw62m4L",
                                        "Authorization": "Client-ID 999ee789e084f2e",
                                        "Content-Type": "application/x-www-form-urlencoded",
                                        "Accept": "application/json"
                                    },
                                    params={
                                        "image": open(os.path.dirname(os.path.realpath(__file__)) + directory_thumb,
                                                      mode='r')
                                    }
                                    )
            #
            print response.body['data']['id']
            print response.body['data']['link']
            print response.body['data']['deletehash']
            #
            try:
                image_thumbnail_id = response.body['data']['id']
                imgur_thumbnail_url = response.body['data']['link']
                deletehash = response.body['data']['deletehash']

                query_string = '''UPDATE {0}_mod set imgur_thumbnail_id = %s, imgur_thumbnail_url = %s, imgur_thumbnail_deletehash = %s WHERE local_thumbnail = %s'''.format(board)
                engine.execute(query_string, image_thumbnail_id, imgur_thumbnail_url, deletehash, image_name)
            except Exception as e:
                print e
                pass

        except:
            pass

        """
        Upload the main image
        """

        try:
            response = unirest.post("https://imgur-apiv3.p.mashape.com/3/image",
                                    headers={
                                        "X-Mashape-Key": "y4mVnnNiZBmshBb9s7rh4DSw6K53p1T5SDujsnxxEUEvw62m4L",
                                        "Authorization": "Client-ID 999ee789e084f2e",
                                        "Content-Type": "application/x-www-form-urlencoded",
                                        "Accept": "application/json"
                                    },
                                    params={
                                        "image": open(os.path.dirname(os.path.realpath(__file__)) + directory_image,
                                                      mode='r')
                                    }
                                    )
            #
            print response.body['data']['id']
            print response.body['data']['link']
            print response.body['data']['deletehash']
            #
            # try:
            imgur_image_id = response.body['data']['id']
            imgur_image_url = response.body['data']['link']
            deletehash = response.body['data']['deletehash']

            query_string = '''UPDATE {0}_mod set imgur_id = %s, imgur_image_url = %s, imgur_deletehash = %s WHERE local_thumbnail = %s'''.format(board)
            engine.execute(query_string, imgur_image_id, imgur_image_url, deletehash, image_name)
        except Exception as e:
            print e
            pass
Esempio n. 39
0
def unit_type():
    res = engine.execute(
        "SELECT name, abbreviation FROM us_unit_types "
        "ORDER BY random() LIMIT 1;"
    )
    return AttrDict([dict(d) for d in res.fetchall()][0])
Esempio n. 40
0

# boards = ['a','c','w','m','cgl','cm','n','jp','v','vg','vp','vr','co','g','tv','k',
#           'o','an','tg','sp','asp','sci','his','int','out','toy','i','po','p','ck','ic','wg','mu','fa',
#           '3','gd','diy','wsg','qst','biz','fit','x','lit','adv','lgbt','mlp','news','wsr','b','r9k','pol','soc','s4s',
#           's','hc','hm','h','e','u','d','y','t','hr','gif','aco','r']

boards = ['b','sci','s']


for board in boards:

    catalog_ranker_check.check_catalog(board)


    threads_that_need_to_be_botted = engine.execute("SELECT threadID FROM {}_catalog WHERE bot = 1 LIMIT 100".format(board))
    for urls in threads_that_need_to_be_botted:


        if not os.path.exists(board + '/' + urls[0]):
            os.makedirs(board + '/' + urls[0])




        browser = webdriver.PhantomJS(executable_path='/root/anaconda2/bin/phantomjs') # or add to your PATH
        browser.get('http://boards.4chan.org/{}/thread/{}/'.format(board, urls[0]))
        browser.set_page_load_timeout(3000)


        response = browser.page_source
Esempio n. 41
0
def create_vendors():

    sql = 'DROP TABLE IF EXISTS Vendors;'
    engine = create_engine(os.getenv('DATABASE_URL'))
    result = engine.execute(sql)

    #sql = 'DROP TABLE IF EXISTS post;'
    #engine = create_engine(os.getenv('DATABASE_URL'))
    #result = engine.execute(sql)

    #sql = 'DROP TABLE IF EXISTS user;'
    #engine = create_engine(os.getenv('DATABASE_URL'))
    #result = engine.execute(sql)

    db.drop_all()
    db.create_all()

    u1 = User(username="******", email="*****@*****.**", password="******")
    db.session.add(u1)
    u2 = User(username="******", email="*****@*****.**", password="******")
    db.session.add(u2)
    db.session.commit()

    p1 = Post(title="my book", content="always true", user_id=u1.id)
    db.session.add(p1)
    p2 = Post(title="helen book", content="more true", user_id=u2.id)
    db.session.add(p2)
    db.session.commit()

    v1 = Vendors(vendor_active="y",
                 vendor_type="e",
                 vendor_company_name="Tiger Estates",
                 vendor_contact_name="Sam Houston",
                 vendor_email="*****@*****.**",
                 vendor_mobile="07424454567",
                 vendor_fax="01793 3224598",
                 vendor_address_house_name="Villa Rosa",
                 vendor_address_house_number="433a",
                 vendor_address_line_1="Central Drive",
                 vendor_address_line_2="South Shore",
                 vendor_address_town="Blackpool",
                 vendor_address_city="Not Applicapble",
                 vendor_address_county="Lancashire",
                 vendor_address_post_code="FY1 6LD",
                 vendor_address_country="England",
                 vendor_address_latitude="53.34",
                 vendor_address_longitude="-1.456")

    db.session.add(v1)
    v2 = Vendors(vendor_active="n",
                 vendor_type="p",
                 vendor_company_name="Not Relevant",
                 vendor_contact_name="Jade Menham",
                 vendor_email="*****@*****.**")
    db.session.add(v2)
    v3 = Vendors(vendor_active="y",
                 vendor_type="p",
                 vendor_company_name="Not Relevant",
                 vendor_contact_name="Helen Menham",
                 vendor_email="*****@*****.**")
    db.session.add(v3)
    v4 = Vendors(vendor_active="y",
                 vendor_type="p",
                 vendor_company_name="Not Relevant",
                 vendor_contact_name="Mario E Wakeham",
                 vendor_email="*****@*****.**")
    db.session.add(v4)

    db.session.commit()

    #vendors = Vendors.query.filter_by(vendor_contact_name = "Jade Menham").first()
    vendors = Vendors.query.filter_by()

    #  v1.print_vendors()
    #v2.print_vendors()

    #return f'The vendor type is {vendors.vendor_type} First Came On Market {vendors.vendor_onmarketdate}'
    return render_template('vendors.html', vendors=vendors)
Esempio n. 42
0

# boards = ['a','c','w','m','cgl','cm','n','jp','v','vg','vp','vr','co','g','tv','k',
#           'o','an','tg','sp','asp','sci','his','int','out','toy','i','po','p','ck','ic','wg','mu','fa',
#           '3','gd','diy','wsg','qst','biz','fit','x','lit','adv','lgbt','mlp','news','wsr','b','r9k','pol','soc','s4s',
#           's','hc','hm','h','e','u','d','y','t','hr','gif','aco','r']

boards = ['asp','sci','his','int','out','toy','i','po','p','ck','ic','wg','mu','fa']


for board in boards:

    catalog_ranker_check.check_catalog(board)


    threads_that_need_to_be_botted = engine.execute("SELECT threadID FROM {}_catalog WHERE bot = 1".format(board))
    for urls in threads_that_need_to_be_botted:


        if not os.path.exists(board + '/' + urls[0]):
            os.makedirs(board + '/' + urls[0])




        browser = webdriver.PhantomJS(executable_path='/root/anaconda2/bin/phantomjs') # or add to your PATH
        browser.get('http://boards.4chan.org/{}/thread/{}/'.format(board, urls[0]))
        browser.set_page_load_timeout(3000)


        response = browser.page_source
Esempio n. 43
0
    def _populate_external_lineage(self) -> None:
        engine = self.get_metadata_engine(database=None)
        # Handles the case where a table is populated from an external location via copy.
        # Eg: copy into category_english from 's3://acryl-snow-demo-olist/olist_raw_data/category_english'credentials=(aws_key_id='...' aws_secret_key='...')  pattern='.*.csv';
        query: str = """
    WITH external_table_lineage_history AS (
        SELECT
            r.value:"locations" as upstream_locations,
            w.value:"objectName" AS downstream_table_name,
            w.value:"objectDomain" AS downstream_table_domain,
            w.value:"columns" AS downstream_table_columns,
            t.query_start_time AS query_start_time
        FROM
            (SELECT * from snowflake.account_usage.access_history) t,
            lateral flatten(input => t.BASE_OBJECTS_ACCESSED) r,
            lateral flatten(input => t.OBJECTS_MODIFIED) w
        WHERE r.value:"locations" IS NOT NULL
        AND w.value:"objectId" IS NOT NULL
        AND t.query_start_time >= to_timestamp_ltz({start_time_millis}, 3)
        AND t.query_start_time < to_timestamp_ltz({end_time_millis}, 3))
    SELECT upstream_locations, downstream_table_name, downstream_table_columns
    FROM external_table_lineage_history
    WHERE downstream_table_domain = 'Table'
    QUALIFY ROW_NUMBER() OVER (PARTITION BY downstream_table_name ORDER BY query_start_time DESC) = 1""".format(
            start_time_millis=int(self.config.start_time.timestamp() * 1000)
            if not self.config.ignore_start_time_lineage else 0,
            end_time_millis=int(self.config.end_time.timestamp() * 1000),
        )

        num_edges: int = 0
        self._external_lineage_map = defaultdict(set)
        try:
            for db_row in engine.execute(query):
                # key is the down-stream table name
                key: str = db_row[1].lower().replace('"', "")
                self._external_lineage_map[key] |= {*json.loads(db_row[0])}
                logger.debug(
                    f"ExternalLineage[Table(Down)={key}]:External(Up)={self._external_lineage_map[key]}"
                )
        except Exception as e:
            logger.warning(
                f"Populating table external lineage from Snowflake failed."
                f"Please check your premissions. Continuing...\nError was {e}."
            )
        # Handles the case for explicitly created external tables.
        # NOTE: Snowflake does not log this information to the access_history table.
        external_tables_query: str = "show external tables"
        try:
            for db_row in engine.execute(external_tables_query):
                key = (
                    f"{db_row.database_name}.{db_row.schema_name}.{db_row.name}"
                    .lower())
                self._external_lineage_map[key].add(db_row.location)
                logger.debug(
                    f"ExternalLineage[Table(Down)={key}]:External(Up)={self._external_lineage_map[key]}"
                )
                num_edges += 1
        except Exception as e:
            self.warn(
                logger,
                "external_lineage",
                f"Populating external table lineage from Snowflake failed."
                f"Please check your premissions. Continuing...\nError was {e}.",
            )
        logger.info(f"Found {num_edges} external lineage edges.")
        self.report.num_external_table_edges_scanned = num_edges
Esempio n. 44
0

# print response.code
# print response.headers
# print response.body
# print response.raw_body

# 3nNpeJh


'''
Delete images
'''

# to_delete = ["692LfMR1HKotuic","nbxl2ZAcOTO2eoz", "nrsNcrYqUV0A9rH"]
to_delete = engine.execute("SELECT deletehash FROM imgur")
for img in to_delete:
  # client.delete_image(img)
# # These code snippets use an open-source library.
  # These code snippets use an open-source library. http://unirest.io/python
  response = unirest.delete("https://imgur-apiv3.p.mashape.com/3/image/{}".format(img[0]),
    headers={
      "X-Mashape-Key": "huYA3ztRaxmshy95Mcj4dTmVrMTHp1iQ858jsn3jpASEst4dig",
      "Authorization": "Client-ID 67d854ceaa5af4c",
      "Accept": "text/plain"
    }
  )
  print response.body['status']

  if response.body['status'] == 200:
    # engine.execute("DELETE FROM imgur WHERE deletehash = {}".format(img[0]))
Esempio n. 45
0
def create_tables(app):
    engine = create_engine(app.config['SQLALCHEMY_DATABASE_URI'])
    if not engine.dialect.has_schema(engine, 'profesores'):
        engine.execute(sqlalchemy.schema.CreateSchema('profesores'))
    db.metadata.create_all(engine)
    return engine
Esempio n. 46
0

base_url = 'http://boards.4chan.org'

engine = create_engine('mysql+pymysql://root:Supermon12'
                                   '@localhost/chanarchive')


boards = ['sci']


for board in boards:

    # DELETES any duplicate entries
    query = '''DELETE n1 FROM {0}_mod n1, {0}_mod n2 WHERE n1.id > n2.id AND n1.threadID = n2.threadID AND n1.position_in_thread = n2.position_in_thread'''.format(board)
    result = engine.execute(query)

    query = '''SELECT count(position_in_thread) FROM {0}_mod WHERE threadID = 8189766'''.format(board)
    result = engine.execute(query)

    for i in result:
        print i[0]

    query = '''SELECT post_message FROM {0}_mod WHERE threadID = 8180660'''.format(board)
    result = engine.execute(query)
    # #
    # for i in result:
    #     soup = BeautifulSoup(str(i[0]), 'lxml')
    #     item = soup.find_all('div')
    #     for i in item:
    #         print str(item) + '\n\n\n'
def check_catalog(board):
    boards = [board]

    for board in boards:


        browser = webdriver.PhantomJS(executable_path='/root/anaconda2/bin/phantomjs') # or add to your PATH
        browser.get("http://boards.4chan.org/{}/catalog".format(board))
        browser.set_page_load_timeout(3000)


        # element = browser.find_element_by_name('html')
        response = browser.page_source



        print "Hello everyone!"


        engine = create_engine('mysql+pymysql://root:Supermon12'
                                   '@localhost/chanarchive')

        soup = BeautifulSoup(response.encode('utf-8'), "lxml")
        # print soup
        # print "This is soup:" + str(soup)

        links =soup.find_all("div", attrs={"class": "thread"})
        # print str(links)
        # print len(links)
        # for l in links:
        #     print l
        soup = BeautifulSoup(str(links), "lxml")
        links = soup.find_all("div", attrs={"class": "thread"})
        # print links
        # print len(links)
        threadList = []

        rank = 1 #Set initial rank to 1 for counting
        engine.execute("UPDATE {}_catalog SET remove = 1 WHERE remove = 0".format(board))
        engine.execute("UPDATE {}_catalog SET bot = 0 WHERE bot = 1".format(board))
        engine.execute("UPDATE {}_catalog SET previous_position = current_position".format(board))
        engine.execute("UPDATE {}_catalog SET current_position = 255".format(board))

        for i in links:
            threadList.append(i['id'].split('-')[1])
            thread_num = i['id'].split('-')[1]


            # creat a for each loop of all parsed threads
            thread_in_db = engine.execute("SELECT IF ( EXISTS( SELECT * FROM {}_catalog WHERE threadID = '{}'), 1, 0)".format(board,thread_num))
            # items = engine.execute("SELECT remove FROM b_catalog")
            # print len(items)
            for i in thread_in_db:
                if i[0]: #If this is true 1
                     # update rank
                    # engine.execute("UPDATE {}_catalog SET previous_position = current_position".format(board))
                    engine.execute("UPDATE {}_catalog SET current_position = {}, remove = 0 WHERE threadID = {}".format(board,rank, thread_num))
                    print "It exists!"
                else:
                    engine.execute("INSERT INTO {}_catalog (threadID,current_position, remove) VALUES ({}, {}, {})".format(board,thread_num, rank, 1))

                    print "404!"


            # Update rank
            rank += 1

        # engine.execute("DELETE from {}_catalog WHERE remove = 1".format(board))

        engine.execute("UPDATE {}_catalog SET bot = 1 WHERE current_position < previous_position".format(board))
        engine.execute("DELETE FROM {}_catalog WHERE remove = 1 AND bot = 0 AND current_position = 255".format(board))
        print rank
This creates an html index of all the files

'''


engine = create_engine('mysql+pymysql://root:magical18'
                               '@localhost/chanarive')




thread = "8166911"
board = 'sci'

# Get the header html
header_html = engine.execute("SELECT header_html FROM header WHERE boardname = 'index'")
for i in header_html:
    thread += i[0] # this only adds the header once

# Gets the distinct threads
thread_number = engine.execute("SELECT DISTINCT threadID FROM sci ORDER BY threadID DESC")

# Goes through each seperate post
for num in thread_number:
    thread_num = num[0]

    count_in_thread = engine.execute("SELECT COUNT(*) FROM sci WHERE threadID = {}".format(num[0]))
    count_pos = 0
    for i in count_in_thread:
        count_pos = i[0]