def freeze_full_database(db_path, prefix, filename='full_dataset_frozen.json'): ''' Create one frozen json file for all data samples in .db file located at db_path. Saves json as flat list of individual records Parameters ---------- db_path : str Absolute path to source db file. e.g. /media/data/pyleaves/leavesdb/resources/leavesdb.db prefix : str DESCRIPTION. Returns ------- frozen_json_filepaths ''' db_URI = f'sqlite:///{db_path}' db = dataset.connect(db_URI) table = db['dataset'] # dataset_rows = list(table.distinct('id')) dataset_rows = list(table.all(order_by='id')) datafreeze.freeze(dataset_rows, mode='list', format='json', filename=filename, prefix=prefix) frozen_json_filepath = {'full_dataset': os.path.join(prefix, filename)} return frozen_json_filepath
def freeze_if_not_empty(items, path): force_eval = list(items) if force_eval: datafreeze.freeze(force_eval, format="json", filename=path) else: with open(path, "w") as f: f.write("{}\n")
def dict2json(data, prefix, filename): ''' Convert a list of dicts into proper json format for contructing sql db Arguments: data, list(dict): e.g. [{'id':1,'path':...,'family':...}, {'id':2,...}] prefix, str: Directory in which to save file filename, str: Name of file in which to save json Return: ''' data_count = len(data) json_output = {"count": data_count, "results": data, "meta": []} datafreeze.freeze(data, mode='list', format='json', filename=filename, prefix=prefix) return json_output
def processFiles(self): # loop through each file in the input for full_path in self.full_paths: file_name = os.path.basename(full_path) print('Processing {:}...'.format(full_path)) self.verbose and analyzer.printContents(full_path) try: self.processOneFile(full_path) except: print("WARNING: Failed to process {:}".format(file_name)) self.drawPlots and plt.show() print("Done processing all files.") # dump the results to a csv file if the user asked for it if self.freezeObj is not None: if self.full_paths == []: # we were called without processing new files if len(self.db.tables) == 1: tableName = self.db.tables[0] else: print('The tables available in the database are:') print(self.db.tables) tableName = input("Type one here --> ") self.t = self.db.create_table(tableName, primary_id='int_hash', primary_type=self.db.types.bigint) result = self.t.all() datafreeze.freeze(result, format='csv', fileobj=self.freezeObj) print('Sooo cold! Data frozen to {:}'.format(self.freezeObj.name)) self.freezeObj.close()
def main(): #continuously update the tweets.csv file while True: #dump the database into the csv file database = dataset.connect("sqlite:///tweets.db") result = database["tweets"].all() #tweets.db can not be empty at this point! freeze(result, format='csv', filename="tweets.csv") #analyze each line in the csv file csvReader() avg_city_scores = {} for key, value in city_scores.items(): avg_city_scores[key] = sum(value) / float(len(value)) print("###########################################") print(avg_city_scores) city_scores.clear() #erase the database tweets table table = database["tweets"] table.delete() try: os.delete("tweets.csv") except: pass #wait for five minutes, then redo the above code time.sleep(30)
def main(args, arduino): db = dataset.connect() table = db['logs'] try: while True: (sensor, avg_distance) = sonic_averages(args, arduino) # Really hacky solution, needs to change try: prev_obj = music(sensor, args, avg_distance, prev_obj) except: prev_obj = music(sensor, args, avg_distance) table.insert( dict(time=int(time.time()), sensor=sensor, value=avg_distance)) datafreeze.freeze(db['logs'].all(), format='csv', filename='logs.csv') except KeyboardInterrupt: # Closes the port and updates the database if there is a keyboard interrupt if args.save: datafreeze.freeze(db['logs'].all(), format='csv', filename='logs.csv') arduino.close() # Should close the serial port (hopefully)
def cleanup_db(): print('EXITING.') print('FREEZING DB...') result = db['tweets'].all() freeze(result, format='csv', filename= 'output/'+db_file+'.csv') for root, dirs, files in os.walk(os.getcwd()): if 'output/'+db_file+'.csv' in files: print('DB FROZEN. {}.csv SAVED'.format())
def export_ctf(segments=None): db = dataset.connect(get_config('SQLALCHEMY_DATABASE_URI')) if segments is None: segments = ['challenges', 'teams', 'both', 'metadata'] groups = { 'challenges': [ 'challenges', 'files', 'tags', 'keys', 'hints', ], 'teams': [ 'teams', 'tracking', 'awards', ], 'both': [ 'solves', 'wrong_keys', 'unlocks', ], 'metadata': [ 'alembic_version', 'config', 'pages', ] } # Backup database backup = six.BytesIO() backup_zip = zipfile.ZipFile(backup, 'w') for segment in segments: group = groups[segment] for item in group: result = db[item].all() result_file = six.BytesIO() datafreeze.freeze(result, format='ctfd', fileobj=result_file) result_file.seek(0) backup_zip.writestr('db/{}.json'.format(item), result_file.read()) # Backup uploads upload_folder = os.path.join(os.path.normpath(app.root_path), get_config('UPLOAD_FOLDER')) for root, dirs, files in os.walk(upload_folder): for file in files: parent_dir = os.path.basename(root) backup_zip.write(os.path.join(root, file), arcname=os.path.join('uploads', parent_dir, file)) backup_zip.close() backup.seek(0) return backup
def dumper(): #dump the database into the csv file database = dataset.connect("sqlite:///tweets.db") result = database["tweets"].all() #tweets.db can not be empty at this point! freeze(result, format='csv', filename="tweets.csv") table = database["tweets"] table.delete() #analyze each line in the csv file csvReader() avg_city_scores = {} for key, value in city_scores.items(): avg_city_scores[key] = (sum(value) / float(len(value))) print("###########################################") print(avg_city_scores) scores = "" for city in cities: if city in avg_city_scores: if avg_city_scores[city] < -0.5: s = 0 elif (-0.5 < avg_city_scores[city] < 0): s = 1 elif (avg_city_scores[city] == 0): s = 2 elif (0 < avg_city_scores[city] < 0.5): s = 3 else: s = 4 scores += str(s) else: scores += '2' print("updated scores.txt") with open('scores.txt', 'w') as file: file.write(scores) city_scores.clear() os.remove("tweets.csv") #erase the database tweets table try: os.delete("tweets.csv") except: pass
def exportCSV(canidateName): print("Export data to csv: " + canidateName + "_" + csvFileName) db = dataset.connect(sqlConnectionString) result = db[sqlTableName].all() newFileName = "data/" + canidateName + "_" + csvFileName + ".csv" freeze(result, format='csv', filename=newFileName) if canidateName == "B": bucket = "bd2020bidentweets" else: bucket = "bd2020donaldtweets" upload_file(newFileName, bucket)
def export_ctf(): # TODO: For some unknown reason dataset is only able to see alembic_version during tests. # Even using a real sqlite database. This makes this test impossible to pass in sqlite. db = dataset.connect(get_app_config("SQLALCHEMY_DATABASE_URI")) # Backup database backup = tempfile.NamedTemporaryFile() backup_zip = zipfile.ZipFile(backup, "w") tables = db.tables for table in tables: result = db[table].all() result_file = six.BytesIO() datafreeze.freeze(result, format="kmactf", fileobj=result_file) result_file.seek(0) backup_zip.writestr("db/{}.json".format(table), result_file.read()) # # Guarantee that alembic_version is saved into the export if "alembic_version" not in tables: result = { "count": 1, "results": [{ "version_num": get_current_revision() }], "meta": {}, } result_file = six.BytesIO() json.dump(result, result_file) result_file.seek(0) backup_zip.writestr("db/alembic_version.json", result_file.read()) # Backup uploads uploader = get_uploader() uploader.sync() upload_folder = os.path.join(os.path.normpath(app.root_path), app.config.get("UPLOAD_FOLDER")) for root, dirs, files in os.walk(upload_folder): for file in files: parent_dir = os.path.basename(root) backup_zip.write( os.path.join(root, file), arcname=os.path.join("uploads", parent_dir, file), ) backup_zip.close() backup.seek(0) return backup
def freeze_db_by_dataset(db_path='resources/leavesdb.db', prefix='resources', freeze_key='dataset'): ''' Create frozen json files for each dataset in .db file located at db_path. Saves json as flat list of individual records Parameters ---------- db_path : str Absolute path to source db file. e.g. /media/data/pyleaves/leavesdb/resources/leavesdb.db prefix : str DESCRIPTION. freeze_key : str, optional DESCRIPTION. The default is 'dataset'. Returns ------- frozen_json_filepaths : dict({str:str}) ''' db_URI = f'sqlite:///{db_path}' db = dataset.connect(db_URI) table = db['dataset'] dataset_names = table.distinct(freeze_key) frozen_json_filepaths = {} for dataset_name in dataset_names: dataset_name = dataset_name[freeze_key] dataset_rows = table.find(dataset=dataset_name) filename = f'{dataset_name}_frozen.json' datafreeze.freeze(dataset_rows, mode='list', format='json', filename=filename, prefix=prefix) frozen_json_filepaths.update( {dataset_name: os.path.join(prefix, filename)}) return frozen_json_filepaths
def freeze_data(): print("Freezing dimension values...") prefix = os.path.join(get_output_dir(), 'data', 'dimensions') freeze(value_table.all(), prefix=prefix, filename='{{dimension_name}}.csv', format='csv') freeze(value_table.all(), prefix=prefix, filename='{{dimension_name}}.json', format='json') print("Freezing cubes...") for cube in get_cubes(): prefix = os.path.join(get_output_dir(), 'data', cube['statistic_name'], cube['cube_name']) slug = slugify(cube['statistic_title_de']) for (text, rb) in [('labeled', True), ('raw', False)]: q, ps = query_cube(cube['cube_name'], readable=rb) fn = '%s-%s-%s.csv' % (slug, cube['cube_name'], text) print([fn]) freeze(engine.query(q), prefix=prefix, filename=fn)
def export(context, params): table = context.datastore[params["table"]] datafreeze.freeze(table, format="json", filename=params["filename"])
import settings import tweepy import dataset from datafreeze import freeze from textblob import TextBlob db = dataset.connect(settings.CONNECTION_STRING) result = db[settings.TABLE_NAME].all() freeze(result, format='csv', filename='test.csv')
import dataset from datafreeze import freeze db = dataset.connect("sqlite:///tweets.db") result = db['myTable'].all() freeze(result, format='csv', filename='tweets.csv')
print(filename) contents = open(filename, "r").read() soup = BeautifulSoup(contents) wiponumber = soup.find_all('record') for c in wiponumber: properties['wipenumber'] = c.attrs[u'pn'] mainclassification = soup.find_all('ipcs') for c in mainclassification: properties['mainclass'] = c.attrs[u'mc'] subclasses = soup.find_all('ipc') subcls = [] for c in subclasses: subcls.append(c.attrs[u'ic']) properties['subclasses'] = '"' + "--//--".join(subcls) + '"' properties['title'] = '"' + soup.find('ti').get_text().replace( '\n', "").replace('"', "").replace("'", "") + '"' properties['abstract'] = '"' + soup.find('ab').get_text().replace( '\n', "").replace('"', "").replace("'", "") + '"' properties['claims'] = '"' + soup.find('cl').get_text().replace( '\n', "").replace('"', "").replace("'", "") + '"' properties['description'] = '"' + soup.find('txt').get_text( ).replace('\n', "").replace('"', "").replace("'", "") + '"' # print(properties) # print(table) table.insert(properties) freeze(table, format='csv', filename='WIPO-alpha-train.csv')
import logging import dataset import datafreeze from config import CONNECTION_STRING, TABLE_NAME, CSV_NAME logger = logging.getLogger() # Export the contents of a SQLite table to CSV: try: db = dataset.connect(CONNECTION_STRING) result = db[TABLE_NAME].all() datafreeze.freeze(result, format='csv', filename=CSV_NAME) logger.info("All data successfully exported to CSV") except Exception as err: logger.error(err)
import tweepy import dataset from datafreeze import freeze from textblob import TextBlob CONNECTION_STRING = "sqlite:///tweets.db" CSV_NAME = "data/tweets.csv" TABLE_NAME = "tweets" db = dataset.connect(CONNECTION_STRING) result = db[TABLE_NAME].all() freeze(result, format='csv', filename=CSV_NAME)
def export(context, params): table = context.datastore[params["table"]] datafreeze.freeze(table, format='json', filename=params['filename'])
'name': 'Marie Curie', 'nationality': 'Polish', 'sex': 'female', 'year': 1911 }] import dataset db = dataset.connect('sqlite:///data/nobel_prize.db') wtable = db['winners'] winners = wtable.find() winners = list(winners) print(winners) wtable = db['winners'] wtable.drop() wtable = db['winners'] wtable.find() with db as tx: for w in nobel_winners: tx['winners'].insert(w) print(list(db['winners'].find())) from datafreeze import freeze winners = db['winners'].find() freeze(winners, format='csv', filename='data/nobel_winners_ds.csv') open('data/nobel_winners_ds.csv').read()
mkt.person_id, dd.FullDateUSA, mkt.min_reg_id, mkt.total_platelet_donations from ( select MKT.personid PERSON_ID, MIN(mkt.COLLECTIONDATESK) MIN_REG_DATE,MIN(registrationid) MIN_REG_ID,COUNT(1) total_platelet_donations from [Integration].[dbo].[INT_MKTCollectionDetails] mkt where mkt.DonationTypeSK in (2,5,7,26) and mkt.CompletedFlag >= 8 and upper(mkt.GENDER) in ('M','F') and (MKT.collectiondatesk >= '20180701' and MKT.collectiondatesk < '20180801') group by MKT.personid ) mkt JOIN [Integration].[dbo].[VW_INT_DIMDATE] dd ON dd.dateKey = mkt.MIN_REG_DATE """) # for row in mktJul2018Donors: # print(row['PERSON_ID'], row['total_platelet_donations']) # Output to CSV: #datafreeze.freeze(mktJul2018Donors, format='csv', # filename='data/mktCollect_Jul18_platelet_dons.csv') # Output to JSON: datafreeze.freeze(mktJul2018Donors, format='json', wrap=False, prefix='../', filename='data/mktCollect_Jul18_plDons.json')
db = dataset.connect( 'mssql+pyodbc://ORLEBIDEVDB/INTEGRATION?driver=SQL+Server+Native+Client+11.0' ) # Querying: mktJul2018Donors = db.query(""" select mkt.person_id, dd.FullDateUSA, mkt.min_reg_id, mkt.total_platelet_donations from ( select MKT.personid PERSON_ID, MIN(mkt.COLLECTIONDATESK) MIN_REG_DATE,MIN(registrationid) MIN_REG_ID,COUNT(1) total_platelet_donations from [Integration].[dbo].[INT_MKTCollectionDetails] mkt where mkt.DonationTypeSK in (2,5,7,26) and mkt.CompletedFlag >= 8 and upper(mkt.GENDER) in ('M','F') and (MKT.collectiondatesk >= '20180701' and MKT.collectiondatesk < '20180801') --and reg.person_id = 2237761 group by MKT.personid ) mkt JOIN [Integration].[dbo].[VW_INT_DIMDATE] dd ON dd.dateKey = mkt.MIN_REG_DATE """) # Output to JSON: datafreeze.freeze(mktJul2018Donors, format='csv', filename='mktCollect_Jul18_platelet_dons.csv')
import tweepy import dataset from datafreeze import freeze import json db = dataset.connect("sqlite:///tweets.db") result = db["tweets"].all() # We’re now able to write each of our processed tweets to a database. # Once they’re in a database, they can be easily queried, or dumped out to csv for further analysis. freeze(result, format='csv', filename="all-tweets")
import settings #import tweepy import dataset #from textblob import TextBlob from datafreeze import freeze db = dataset.connect(settings.CONNECTION_STRING) result = db[settings.TABLE_NAME].all() freeze(result, format='csv', filename=settings.CSV_NAME)
import settings import dataset from datafreeze import freeze db = dataset.connect(settings.CONNECTION_STRING) result = db[settings.OUTPUT_TABLE].all() freeze(result, format='csv', filename=settings.OUTPUT_FILE)
import settings import dataset from datafreeze import freeze db = dataset.connect(settings.CONNECTION_STRING) result = db[settings.TABLE_NAME].all() freeze(result, format='csv', filename=settings.TWEETS_FILE_CSV)
def cleanup_db(): print('EXITING.') print('FREEZING DB...') result = db['tweets'].all() freeze(result, format='csv', filename= 'output/'+db_file+'.csv') for root, dirs, files in os.walk(os.getcwd()): if 'output/'+db_file+'.csv' in files: print('DB FROZEN. {}.csv SAVED'.format()) try: access_token = access_token_secret = consumer_key = consumer_secret = auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) listener = StreamListener(api) stream = tweepy.Stream(auth = api.auth, listener = listener) stream.filter(track = to_track) except KeyboardInterrupt: print('FREEZING DB...') result = db['tweets'].all() freeze(result, format='csv', filename= 'output/'+db_file+'.csv') sys.exit() atexit.register(cleanup)
import settings import dataset import datafreeze db = dataset.connect(settings.scraper_db) result = db[settings.tweets_table].all() datafreeze.freeze(result, format='csv', filename=settings.dump_tweets_csv)
import settings import dataset import datafreeze db = dataset.connect(settings.clusters_db) result = db[settings.store_clusters_table].all() datafreeze.freeze(result, format='csv', filename=settings.dump_clusters_csv)