Example #1
0
def freeze_full_database(db_path, prefix, filename='full_dataset_frozen.json'):
    '''
    Create one frozen json file for all data samples in .db file located at db_path.

    Saves json as flat list of individual records


    Parameters
    ----------
    db_path : str
        Absolute path to source db file. e.g. /media/data/pyleaves/leavesdb/resources/leavesdb.db
    prefix : str
        DESCRIPTION.

    Returns
    -------
    frozen_json_filepaths

    '''
    db_URI = f'sqlite:///{db_path}'
    db = dataset.connect(db_URI)
    table = db['dataset']

    # dataset_rows = list(table.distinct('id'))
    dataset_rows = list(table.all(order_by='id'))

    datafreeze.freeze(dataset_rows,
                      mode='list',
                      format='json',
                      filename=filename,
                      prefix=prefix)
    frozen_json_filepath = {'full_dataset': os.path.join(prefix, filename)}

    return frozen_json_filepath
Example #2
0
def freeze_if_not_empty(items, path):
    force_eval = list(items)
    if force_eval:
        datafreeze.freeze(force_eval, format="json", filename=path)
    else:
        with open(path, "w") as f:
            f.write("{}\n")
Example #3
0
def dict2json(data, prefix, filename):
    '''
    Convert a list of dicts into proper json format for contructing sql db

    Arguments:
        data, list(dict):
            e.g. [{'id':1,'path':...,'family':...}, {'id':2,...}]
        prefix, str:
            Directory in which to save file
        filename, str:
            Name of file in which to save json

    Return:

    '''
    data_count = len(data)
    json_output = {"count": data_count, "results": data, "meta": []}

    datafreeze.freeze(data,
                      mode='list',
                      format='json',
                      filename=filename,
                      prefix=prefix)

    return json_output
Example #4
0
 def processFiles(self):
   # loop through each file in the input
   for full_path in self.full_paths:
     file_name = os.path.basename(full_path)
     print('Processing {:}...'.format(full_path))
     self.verbose and analyzer.printContents(full_path)
     try:
       self.processOneFile(full_path)
     except:
       print("WARNING: Failed to process {:}".format(file_name))
     self.drawPlots and plt.show()
   print("Done processing all files.")
   
   # dump the results to a csv file if the user asked for it
   if self.freezeObj is not None:
     if self.full_paths == []:  # we were called without processing new files
       if len(self.db.tables) == 1:
         tableName = self.db.tables[0]
       else:
         print('The tables available in the database are:')
         print(self.db.tables)
         tableName = input("Type one here --> ")
       self.t = self.db.create_table(tableName, primary_id='int_hash', primary_type=self.db.types.bigint)
     result = self.t.all()
     datafreeze.freeze(result, format='csv', fileobj=self.freezeObj)
     print('Sooo cold! Data frozen to {:}'.format(self.freezeObj.name))
     self.freezeObj.close()
Example #5
0
def main():

    #continuously update the tweets.csv file
    while True:

        #dump the database into the csv file

        database = dataset.connect("sqlite:///tweets.db")

        result = database["tweets"].all()

        #tweets.db can not be empty at this point!
        freeze(result, format='csv', filename="tweets.csv")

        #analyze each line in the csv file
        csvReader()

        avg_city_scores = {}

        for key, value in city_scores.items():
            avg_city_scores[key] = sum(value) / float(len(value))
        print("###########################################")
        print(avg_city_scores)

        city_scores.clear()
        #erase the database tweets table
        table = database["tweets"]
        table.delete()

        try:
            os.delete("tweets.csv")
        except:
            pass
        #wait for five minutes, then redo the above code
        time.sleep(30)
def main(args, arduino):

    db = dataset.connect()
    table = db['logs']

    try:
        while True:
            (sensor, avg_distance) = sonic_averages(args, arduino)

            # Really hacky solution, needs to change
            try:
                prev_obj = music(sensor, args, avg_distance, prev_obj)
            except:
                prev_obj = music(sensor, args, avg_distance)

            table.insert(
                dict(time=int(time.time()), sensor=sensor, value=avg_distance))
            datafreeze.freeze(db['logs'].all(),
                              format='csv',
                              filename='logs.csv')

    except KeyboardInterrupt:  # Closes the port and updates the database if there is a keyboard interrupt
        if args.save:
            datafreeze.freeze(db['logs'].all(),
                              format='csv',
                              filename='logs.csv')

        arduino.close()  # Should close the serial port (hopefully)
Example #7
0
 def cleanup_db():
     print('EXITING.')
     print('FREEZING DB...')
     result = db['tweets'].all()
     freeze(result, format='csv', filename= 'output/'+db_file+'.csv')
     for root, dirs, files in os.walk(os.getcwd()):
         if 'output/'+db_file+'.csv' in files:
             print('DB FROZEN. {}.csv SAVED'.format())
def export_ctf(segments=None):
    db = dataset.connect(get_config('SQLALCHEMY_DATABASE_URI'))
    if segments is None:
        segments = ['challenges', 'teams', 'both', 'metadata']

    groups = {
        'challenges': [
            'challenges',
            'files',
            'tags',
            'keys',
            'hints',
        ],
        'teams': [
            'teams',
            'tracking',
            'awards',
        ],
        'both': [
            'solves',
            'wrong_keys',
            'unlocks',
        ],
        'metadata': [
            'alembic_version',
            'config',
            'pages',
        ]
    }

    # Backup database
    backup = six.BytesIO()

    backup_zip = zipfile.ZipFile(backup, 'w')

    for segment in segments:
        group = groups[segment]
        for item in group:
            result = db[item].all()
            result_file = six.BytesIO()
            datafreeze.freeze(result, format='ctfd', fileobj=result_file)
            result_file.seek(0)
            backup_zip.writestr('db/{}.json'.format(item), result_file.read())

    # Backup uploads
    upload_folder = os.path.join(os.path.normpath(app.root_path),
                                 get_config('UPLOAD_FOLDER'))
    for root, dirs, files in os.walk(upload_folder):
        for file in files:
            parent_dir = os.path.basename(root)
            backup_zip.write(os.path.join(root, file),
                             arcname=os.path.join('uploads', parent_dir, file))

    backup_zip.close()
    backup.seek(0)
    return backup
Example #9
0
def dumper():
    #dump the database into the csv file
    database = dataset.connect("sqlite:///tweets.db")

    result = database["tweets"].all()

    #tweets.db can not be empty at this point!
    freeze(result, format='csv', filename="tweets.csv")

    table = database["tweets"]
    table.delete()

    #analyze each line in the csv file
    csvReader()

    avg_city_scores = {}

    for key, value in city_scores.items():
        avg_city_scores[key] = (sum(value) / float(len(value)))
    print("###########################################")
    print(avg_city_scores)

    scores = ""

    for city in cities:
        if city in avg_city_scores:
            if avg_city_scores[city] < -0.5:
                s = 0
            elif (-0.5 < avg_city_scores[city] < 0):
                s = 1
            elif (avg_city_scores[city] == 0):
                s = 2
            elif (0 < avg_city_scores[city] < 0.5):
                s = 3
            else:
                s = 4
            scores += str(s)
        else:
            scores += '2'
    print("updated scores.txt")

    with open('scores.txt', 'w') as file:
        file.write(scores)

    city_scores.clear()

    os.remove("tweets.csv")

    #erase the database tweets table

    try:
        os.delete("tweets.csv")
    except:
        pass
Example #10
0
def exportCSV(canidateName):
    print("Export data to csv: " + canidateName + "_" + csvFileName)
    db = dataset.connect(sqlConnectionString)
    result = db[sqlTableName].all()
    newFileName = "data/" + canidateName + "_" + csvFileName + ".csv"
    freeze(result, format='csv', filename=newFileName)

    if canidateName == "B":
        bucket = "bd2020bidentweets"
    else:
        bucket = "bd2020donaldtweets"
    upload_file(newFileName, bucket)
Example #11
0
def export_ctf():
    # TODO: For some unknown reason dataset is only able to see alembic_version during tests.
    # Even using a real sqlite database. This makes this test impossible to pass in sqlite.
    db = dataset.connect(get_app_config("SQLALCHEMY_DATABASE_URI"))

    # Backup database
    backup = tempfile.NamedTemporaryFile()

    backup_zip = zipfile.ZipFile(backup, "w")

    tables = db.tables
    for table in tables:
        result = db[table].all()
        result_file = six.BytesIO()
        datafreeze.freeze(result, format="kmactf", fileobj=result_file)
        result_file.seek(0)
        backup_zip.writestr("db/{}.json".format(table), result_file.read())

    # # Guarantee that alembic_version is saved into the export
    if "alembic_version" not in tables:
        result = {
            "count": 1,
            "results": [{
                "version_num": get_current_revision()
            }],
            "meta": {},
        }
        result_file = six.BytesIO()
        json.dump(result, result_file)
        result_file.seek(0)
        backup_zip.writestr("db/alembic_version.json", result_file.read())

    # Backup uploads
    uploader = get_uploader()
    uploader.sync()

    upload_folder = os.path.join(os.path.normpath(app.root_path),
                                 app.config.get("UPLOAD_FOLDER"))
    for root, dirs, files in os.walk(upload_folder):
        for file in files:
            parent_dir = os.path.basename(root)
            backup_zip.write(
                os.path.join(root, file),
                arcname=os.path.join("uploads", parent_dir, file),
            )

    backup_zip.close()
    backup.seek(0)
    return backup
Example #12
0
def freeze_db_by_dataset(db_path='resources/leavesdb.db',
                         prefix='resources',
                         freeze_key='dataset'):
    '''
    Create frozen json files for each dataset in .db file located at db_path.

    Saves json as flat list of individual records


    Parameters
    ----------
    db_path : str
        Absolute path to source db file. e.g. /media/data/pyleaves/leavesdb/resources/leavesdb.db
    prefix : str
        DESCRIPTION.
    freeze_key : str, optional
        DESCRIPTION. The default is 'dataset'.

    Returns
    -------
    frozen_json_filepaths : dict({str:str})

    '''
    db_URI = f'sqlite:///{db_path}'
    db = dataset.connect(db_URI)
    table = db['dataset']

    dataset_names = table.distinct(freeze_key)

    frozen_json_filepaths = {}
    for dataset_name in dataset_names:
        dataset_name = dataset_name[freeze_key]
        dataset_rows = table.find(dataset=dataset_name)

        filename = f'{dataset_name}_frozen.json'
        datafreeze.freeze(dataset_rows,
                          mode='list',
                          format='json',
                          filename=filename,
                          prefix=prefix)
        frozen_json_filepaths.update(
            {dataset_name: os.path.join(prefix, filename)})

    return frozen_json_filepaths
Example #13
0
def freeze_data():
    print("Freezing dimension values...")
    prefix = os.path.join(get_output_dir(), 'data', 'dimensions')
    freeze(value_table.all(), prefix=prefix, filename='{{dimension_name}}.csv', format='csv')
    freeze(value_table.all(), prefix=prefix, filename='{{dimension_name}}.json', format='json')

    print("Freezing cubes...")
    for cube in get_cubes():
        prefix = os.path.join(get_output_dir(), 'data',
                              cube['statistic_name'],
                              cube['cube_name'])
        slug = slugify(cube['statistic_title_de'])
        for (text, rb) in [('labeled', True), ('raw', False)]:
            q, ps = query_cube(cube['cube_name'], readable=rb)
            fn = '%s-%s-%s.csv' % (slug, cube['cube_name'], text)
            print([fn])
            freeze(engine.query(q), prefix=prefix, filename=fn)
Example #14
0
def export(context, params):
    table = context.datastore[params["table"]]
    datafreeze.freeze(table, format="json", filename=params["filename"])
Example #15
0
import settings
import tweepy
import dataset
from datafreeze import freeze
from textblob import TextBlob

db = dataset.connect(settings.CONNECTION_STRING)

result = db[settings.TABLE_NAME].all()
freeze(result, format='csv', filename='test.csv')
import dataset
from datafreeze import freeze

db = dataset.connect("sqlite:///tweets.db")
result = db['myTable'].all()
freeze(result, format='csv', filename='tweets.csv')
Example #17
0
            print(filename)
            contents = open(filename, "r").read()
            soup = BeautifulSoup(contents)

            wiponumber = soup.find_all('record')
            for c in wiponumber:
                properties['wipenumber'] = c.attrs[u'pn']

            mainclassification = soup.find_all('ipcs')
            for c in mainclassification:
                properties['mainclass'] = c.attrs[u'mc']

            subclasses = soup.find_all('ipc')
            subcls = []
            for c in subclasses:
                subcls.append(c.attrs[u'ic'])
            properties['subclasses'] = '"' + "--//--".join(subcls) + '"'
            properties['title'] = '"' + soup.find('ti').get_text().replace(
                '\n', "").replace('"', "").replace("'", "") + '"'
            properties['abstract'] = '"' + soup.find('ab').get_text().replace(
                '\n', "").replace('"', "").replace("'", "") + '"'
            properties['claims'] = '"' + soup.find('cl').get_text().replace(
                '\n', "").replace('"', "").replace("'", "") + '"'
            properties['description'] = '"' + soup.find('txt').get_text(
            ).replace('\n', "").replace('"', "").replace("'", "") + '"'
            # print(properties)
            # print(table)
            table.insert(properties)

freeze(table, format='csv', filename='WIPO-alpha-train.csv')
Example #18
0
import logging
import dataset
import datafreeze
from config import CONNECTION_STRING, TABLE_NAME, CSV_NAME

logger = logging.getLogger()

# Export the contents of a SQLite table to CSV:
try:
    db = dataset.connect(CONNECTION_STRING)
    result = db[TABLE_NAME].all()
    datafreeze.freeze(result, format='csv', filename=CSV_NAME)
    logger.info("All data successfully exported to CSV")
except Exception as err:
    logger.error(err)
Example #19
0
import tweepy
import dataset
from datafreeze import freeze
from textblob import TextBlob

CONNECTION_STRING = "sqlite:///tweets.db"
CSV_NAME = "data/tweets.csv"
TABLE_NAME = "tweets"

db = dataset.connect(CONNECTION_STRING)

result = db[TABLE_NAME].all()
freeze(result, format='csv', filename=CSV_NAME)
Example #20
0
def export(context, params):
    table = context.datastore[params["table"]]
    datafreeze.freeze(table, format='json', filename=params['filename'])
Example #21
0
    'name': 'Marie Curie',
    'nationality': 'Polish',
    'sex': 'female',
    'year': 1911
}]

import dataset
db = dataset.connect('sqlite:///data/nobel_prize.db')

wtable = db['winners']
winners = wtable.find()
winners = list(winners)
print(winners)

wtable = db['winners']
wtable.drop()
wtable = db['winners']
wtable.find()

with db as tx:
    for w in nobel_winners:
        tx['winners'].insert(w)

print(list(db['winners'].find()))

from datafreeze import freeze

winners = db['winners'].find()
freeze(winners, format='csv', filename='data/nobel_winners_ds.csv')
open('data/nobel_winners_ds.csv').read()
Example #22
0
    mkt.person_id,
    dd.FullDateUSA,
    mkt.min_reg_id,
    mkt.total_platelet_donations
from
(
    select MKT.personid PERSON_ID, MIN(mkt.COLLECTIONDATESK) MIN_REG_DATE,MIN(registrationid) MIN_REG_ID,COUNT(1) total_platelet_donations
    from [Integration].[dbo].[INT_MKTCollectionDetails] mkt
    where mkt.DonationTypeSK in (2,5,7,26)		
     and mkt.CompletedFlag >= 8		
     and upper(mkt.GENDER) in ('M','F')		
     and (MKT.collectiondatesk >= '20180701' and MKT.collectiondatesk < '20180801')
    group by MKT.personid
) mkt
JOIN [Integration].[dbo].[VW_INT_DIMDATE] dd
    ON dd.dateKey = mkt.MIN_REG_DATE
""")

# for row in mktJul2018Donors:
#   print(row['PERSON_ID'], row['total_platelet_donations'])

# Output to CSV:
#datafreeze.freeze(mktJul2018Donors, format='csv',
#                  filename='data/mktCollect_Jul18_platelet_dons.csv')

# Output to JSON:
datafreeze.freeze(mktJul2018Donors,
                  format='json',
                  wrap=False,
                  prefix='../',
                  filename='data/mktCollect_Jul18_plDons.json')
db = dataset.connect(
    'mssql+pyodbc://ORLEBIDEVDB/INTEGRATION?driver=SQL+Server+Native+Client+11.0'
)

# Querying:

mktJul2018Donors = db.query("""
select
    mkt.person_id,
    dd.FullDateUSA,
    mkt.min_reg_id,
    mkt.total_platelet_donations
from
(
    select MKT.personid PERSON_ID, MIN(mkt.COLLECTIONDATESK) MIN_REG_DATE,MIN(registrationid) MIN_REG_ID,COUNT(1) total_platelet_donations
    from [Integration].[dbo].[INT_MKTCollectionDetails] mkt
    where mkt.DonationTypeSK in (2,5,7,26)		
     and mkt.CompletedFlag >= 8		
     and upper(mkt.GENDER) in ('M','F')		
     and (MKT.collectiondatesk >= '20180701' and MKT.collectiondatesk < '20180801')
    --and reg.person_id = 2237761
    group by MKT.personid
) mkt
JOIN [Integration].[dbo].[VW_INT_DIMDATE] dd
    ON dd.dateKey = mkt.MIN_REG_DATE
""")

# Output to JSON:
datafreeze.freeze(mktJul2018Donors,
                  format='csv',
                  filename='mktCollect_Jul18_platelet_dons.csv')
Example #24
0
import tweepy
import dataset
from datafreeze import freeze
import json

db = dataset.connect("sqlite:///tweets.db")

result = db["tweets"].all()
# We’re now able to write each of our processed tweets to a database.
# Once they’re in a database, they can be easily queried, or dumped out to csv for further analysis.
freeze(result, format='csv', filename="all-tweets")
Example #25
0
import settings
#import tweepy
import dataset
#from textblob import TextBlob
from datafreeze import freeze

db = dataset.connect(settings.CONNECTION_STRING)

result = db[settings.TABLE_NAME].all()
freeze(result, format='csv', filename=settings.CSV_NAME)
Example #26
0
import settings
import dataset
from datafreeze import freeze

db = dataset.connect(settings.CONNECTION_STRING)

result = db[settings.OUTPUT_TABLE].all()
freeze(result, format='csv', filename=settings.OUTPUT_FILE)
Example #27
0
import settings
import dataset
from datafreeze import freeze

db = dataset.connect(settings.CONNECTION_STRING)
result = db[settings.TABLE_NAME].all()
freeze(result, format='csv', filename=settings.TWEETS_FILE_CSV)
Example #28
0
    def cleanup_db():
        print('EXITING.')
        print('FREEZING DB...')
        result = db['tweets'].all()
        freeze(result, format='csv', filename= 'output/'+db_file+'.csv')
        for root, dirs, files in os.walk(os.getcwd()):
            if 'output/'+db_file+'.csv' in files:
                print('DB FROZEN. {}.csv SAVED'.format())

    
    try: 
        access_token = 
        access_token_secret = 
        consumer_key = 
        consumer_secret = 

        auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
        auth.set_access_token(access_token, access_token_secret)
        api = tweepy.API(auth)

        listener = StreamListener(api)
        stream = tweepy.Stream(auth = api.auth, listener = listener)
        stream.filter(track = to_track)
    except KeyboardInterrupt:
        print('FREEZING DB...')
        result = db['tweets'].all()
        freeze(result, format='csv', filename= 'output/'+db_file+'.csv')
        sys.exit()

    atexit.register(cleanup)
import settings
import dataset
import datafreeze

db = dataset.connect(settings.scraper_db)
result = db[settings.tweets_table].all()
datafreeze.freeze(result, format='csv', filename=settings.dump_tweets_csv)
Example #30
0
import settings
import dataset
import datafreeze

db = dataset.connect(settings.clusters_db)
result = db[settings.store_clusters_table].all()
datafreeze.freeze(result, format='csv', filename=settings.dump_clusters_csv)