def summarize_agreement(): """ Summarize agreement levels """ country_list = get_country_list() countries = OrderedDict() for country in country_list: if country in ANALYSIS_COUNTRIES: countries[country] = OrderedDict((('country', country), )) for question_id in ANALYSIS_QUESTIONS: question, result = query(question_id) if question['question_type'] == 'mentioned': process_mentioned(question, result, countries) if question['question_type'] == 'agree_3way': process_agree_3way(question, result, countries) if question['question_type'] == 'agree_4way': process_agree_4way(question, result, countries) if question['question_type'] == 'likert': process_likert(question, result, countries) dataset.freeze(countries.values(), format='csv', filename='output/agreement_summary.csv')
def summarize_2010(): result = db.query(""" select f.county_name, sum(c.d001::integer) as total, sum(c.d003::integer) as white, (sum(c.d003::float)/sum(c.d001::float)) as white_percent, sum(c.d004::integer) as black, (sum(c.d004::float)/sum(c.d001::float)) as black_percent, sum(c.d006::integer) as asian, (sum(c.d006::float)/sum(c.d001::float)) as asian_percent, sum(c.d010::integer) as hispanic, (sum(c.d010::float)/sum(c.d001::float)) as hispanic_percent from census_data c join census_geography_2010 g on c.geo_id = g.geo_id join fips f on g.county = f.county_fp where c.product='decennial-2010' and f.county_name in ({0}) group by f.county_name """.format(get_parishes())) dataset.freeze(result, format='csv', filename='output/population-summary/decennial-population-2010.csv')
def summarize(): table_1965_raw = agate.Table.from_csv('processed-data/1965.csv', COLUMNS) table_2015_raw = agate.Table.from_csv('processed-data/2015.csv', COLUMNS) for region, states, population in REGIONS: table_1965 = table_1965_raw.where(lambda row: row['state'] in states) table_2015 = table_2015_raw.where(lambda row: row['state'] in states) output = [] for col_name, col_type in COLUMNS[2:]: row = OrderedDict() row['var'] = col_name row['1965'] = table_1965.columns[col_name].aggregate(agate.Sum()) row['1965_per_capita'] = row['1965'] / population['1965'] row['2015'] = table_2015.columns[col_name].aggregate(agate.Sum()) row['2015_per_capita'] = row['2015'] / population['2014'] row['absolute_percent_change'] = (row['2015'] - row['1965']) / row['1965'] row['per_capita_percent_change'] = ( row['2015_per_capita'] - row['1965_per_capita']) / row['1965_per_capita'] output.append(row) dataset.freeze(output, format='csv', filename='processed-data/{0}-sums.csv'.format(region))
def summarize_agreement(): """ Summarize agreement levels """ country_list = get_country_list() countries = OrderedDict() for country in country_list: if country in ANALYSIS_COUNTRIES: countries[country] = OrderedDict((('country', country),)) for question_id in ANALYSIS_QUESTIONS: question, result = query(question_id) if question['question_type'] == 'mentioned': process_mentioned(question, result, countries) if question['question_type'] == 'agree_3way': process_agree_3way(question, result, countries) if question['question_type'] == 'agree_4way': process_agree_4way(question, result, countries) if question['question_type'] == 'likert': process_likert(question, result, countries) dataset.freeze(countries.values(), format='csv', filename='output/agreement_summary.csv')
def save_data(tablename: str, results: List[Dict[str, Any]], taskname: str, directory: str, timestamp: Union[arrow.Arrow, datetime] = None, output_format: str = "csv"): """ Saves a dataset result set to a suitable output file. output_format can be one of: csv, json, tabson (see https://dataset.readthedocs.org/en/latest/api.html#dataset.freeze) """ if timestamp is None: timestamp = datetime.utcnow() filename = "{taskname}_{datetime}_{tablename}.{output_format}".format( taskname=taskname, tablename=tablename, datetime=timestamp.strftime(FILENAME_SAFE_ISOFORMAT), output_format=output_format ) log.info("Saving {tablename} data to {filename}".format( tablename=tablename, filename=filename)) dataset.freeze(results, format=output_format, filename=filename, prefix=directory) if not os.path.isfile(os.path.join(directory,filename)): log.error( "save_data: file {} not created; empty results?".format(filename))
def summarize_acs(year): result = db.query(""" select f.county_name, sum(c.hd01_vd01::integer) as total, sum(c.hd01_vd03::integer) as white, (sum(c.hd01_vd03::float)/sum(c.hd01_vd01::float)) as white_percent, sum(c.hd01_vd04::integer) as black, (sum(c.hd01_vd04::float)/sum(c.hd01_vd01::float)) as black_percent, sum(c.hd01_vd06::integer) as asian, (sum(c.hd01_vd06::float)/sum(c.hd01_vd01::float)) as asian_percent, sum(c.hd01_vd12::integer) as hispanic, (sum(c.hd01_vd12::float)/sum(c.hd01_vd01::float)) as hispanic_percent from census_data c join census_geography_2010 g on c.geo_id = g.geo_id join fips f on g.county = f.county_fp where c.product='acs-{0}' and f.county_name in ({1}) group by f.county_name """.format(year, get_parishes())) dataset.freeze(result, format='csv', filename='output/population-summary/acs-population-{0}.csv'.format(year))
def lod_dump(ctx, dataset_args, step, stage, reset, full_data, data, archive=True): """ :param ctx: :param path: :param step: :param stage: :param first_only: :return: """ lod_db = _get_db(ctx, stage, dataset_args) step_table = _get_step_table_in_stage( ctx, step, stage, reset, dataset_args) counter = Counter() counter.set_parent(ctx.gbc) stamped_data = make_stamping_gen(ctx, data, full_data, counter) rows = [] if 'DS_CHUNKSIZE' in dataset_args: chunk_size = dataset_args['DS_CHUNKSIZE'] else: chunk_size = 10 ctx.gbc.say('dumping chunksize: %d' % chunk_size, verbosity=40) start_ts = arrow.now() for ditem in stamped_data: jitem = {u'json_str': json.dumps(ditem)} rows.append(jitem) if len(rows) == chunk_size: # lod_db._release() lod_db.begin() step_table.insert_many(rows, chunk_size=chunk_size) lod_db.commit() rows = [] curr_time = arrow.now() - start_ts ctx.gbc.say('dumping chunk took: ' + str(curr_time), verbosity=40) # take care of the rest if len(rows) > 0: step_table.insert_many(rows) ret = {} ret['total'] = counter.get_total() ret['count_in_table'] = step_table.count() totals_equal = False if ret['count_in_table'] == ret['count_in_table']: totals_equal = True ret['totals_equal'] = totals_equal ctx.gbc.say('lod_dump:return', stuff=ret, verbosity=100) freeze_file = './remember/dataset_freeze_' + step + '_' + stage + '.json' dataset.freeze(step_table, format='json', filename=freeze_file) if archive: zipped = zipit(ctx, freeze_file) if zipped and dataset_args['DS_TYPE'] != 'freeze_json': os.remove(freeze_file) total_time = arrow.now() - start_ts ctx.gbc.say('dumping took:' + str(total_time), verbosity=10) return ret
def parse_transcript(filename): wp, session = file_metadata(filename) with open(filename, 'rb') as fh: text = clean_text(fh.read()) table.delete(wahlperiode=wp, sitzung=session) base_data = { 'filename': filename, 'sitzung': session, 'wahlperiode': wp } print "Loading transcript: %s/%.3d, from %s" % (wp, session, filename) seq = 0 parser = SpeechParser(text.split('\n')) for contrib in parser: contrib.update(base_data) contrib['sequence'] = seq contrib['speaker_cleaned'] = clean_name(contrib['speaker']) contrib['speaker_fp'] = fingerprint(contrib['speaker_cleaned']) contrib['speaker_party'] = search_party_names(contrib['speaker']) seq += 1 table.insert(contrib) q = '''SELECT * FROM data WHERE wahlperiode = :w AND sitzung = :s ORDER BY sequence ASC''' fcsv = os.path.basename(filename).replace('.txt', '.csv') rp = eng.query(q, w=wp, s=session) dataset.freeze(rp, filename=fcsv, prefix=OUT_DIR, format='csv')
def get_raw_effort_and_analytics(): """ Get efforts and analytics """ db = dataset.connect(app_config.POSTGRES_URL) results = db.query(""" select s.story_id, s.canonical_url, ga.pageviews, ga.sessions, (case when sp.seamus_id is not null then true else false end) as visuals_contributed from seamus s left join (select story_id, sum(pageviews) as pageviews, sum(sessions) as sessions from google_analytics group by story_id) ga on s.story_id = ga.story_id full outer join spreadsheet sp on s.story_id = sp.seamus_id where ga.pageviews > 0 and ga.sessions > 0 """) dataset.freeze(results, format='csv', filename='www/live-data/raw_effort_and_analytics.csv')
def store_layer_to_db(data, layer, features): """Load a layer of features into a database table.""" # table names are generated from the name of the layer and # the name of the country. tbl_name = '%s %s' % (data['name'], layer['name']) tbl_name = slugify(tbl_name, sep='_') log.info(' -> %s: %s rows', tbl_name, len(features)) tbl = database[tbl_name] # clear out all existing data. tbl.delete() rows = [] types = {} for feature in features: row = convrow(feature['attributes']) for k, v in row.items(): if isinstance(v, (int, long)): types[k] = BigInteger row['layer_name'] = layer['name'] row['layer_id'] = layer['id'] row['source_name'] = data['name'] row['source_title'] = data['title'] row['source_url'] = data['url'] if QUERY['returnGeometry'] == 'true': # store the geometry as JSON. not sure this is a # great idea because it may make the resulting # CSV files really hard to parse. row['_geometry'] = json.dumps(feature['geometry']) row['_attributes'] = json.dumps(feature['attributes']) rows.append(row) tbl.insert_many(rows, types=types) # Dump the table to a CSV file csv_file = '%s.csv' % tbl_name log.info(' -> %s', csv_file) dataset.freeze(tbl, prefix=DATA_PATH, filename=csv_file, format='csv')
def make_plot(stock): #get message volume #connect aws rds frankfurt db = dataset.connect('mysql://' + config.user + ":" + config.pw + "@" + config.hostfrank + '/' + config.database) def getWeeklyVolume(symbol): return db.query( "select week(time, 3) as week, year(time) as year, count(*) as message_count " "from messages_w_sentiment_v2 " "where date(time) between 20140101 and 20161231 " "and symbol = " + "'" + symbol + "' " "group by week,year " "order by year,week ") result = getWeeklyVolume(stock) dataset.freeze(result, format='csv', filename=stock + 'messageVolume.csv') mvdf = pd.read_csv(stock + 'messageVolume.csv') #get volume ydf = yahooDownloader.download(stock, 1, 1, 2014, 31, 12, 2016) vdf = ydf[['Volume']].copy() vdf['date'] = ydf.index kw = lambda x: x.isocalendar()[1] kw_year = lambda x: str(x.year) + ' - ' + str(x.isocalendar()[1]) grouped = vdf.groupby([vdf['date'].map(kw_year)], sort=False).agg({'Volume': 'sum'}) mvdf = mvdf[:-1] #prepare plot x = mvdf['message_count'] y = grouped['Volume'] #regress slope, intercept, r_value, p_value, std_err = stats.linregress(x, y) r_x, r_y = zip(*((i, i * slope + intercept) for i in range(0, 5000000, 100000))) #plot p = figure(plot_width=400, plot_height=400) output_file("index.html") p.line(r_x, r_y, color="red") p.scatter(x, y, marker="square", color="blue") p.left[0].formatter.use_scientific = False p.xaxis.axis_label = 'Stocktwits Message Volume' p.yaxis.axis_label = 'Trading Volume' p.title.text = stock p.title.align = "center" p.title.text_font_size = "25px" pvalue_subtitle = "p-value: " + str(p_value) p.add_layout(Title(text=pvalue_subtitle, align="center"), "below") p.x_range = Range1d(0, x.max()) p.y_range = Range1d(0, y.max()) show(p)
def summarize_question(question_id): """ Summarize responses for a given question ID """ question, result = query(question_id) print '{0}: {1}'.format(question_id, question['label']) counts = OrderedDict() for row in result: if not row['country'] in counts.keys(): counts[row['country']] = initialize_counts(question_id) counts[row["country"]][row["response"]] += 1 output = [] for country, values in counts.items(): output_row = OrderedDict((('country', country),)) total = 0 for label, value in values.items(): total += int(value) output_row['total_responses'] = total for label, value in values.items(): output_row[label] = value pct_label = '{0} pct'.format(label.encode('ascii', 'ignore').decode('ascii')) output_row[pct_label] = float(value) / total output.append(output_row) dataset.freeze(output, format='csv', filename='output/{0}.csv'.format(question_id))
def export_alarms_json(self): """ Exports all the alarm data into a JSON string. The dataset.freeze() method exports the table into a file object, but it is going to be "tricked" into getting an string object to send back. Because it also closes the object file we need to overwrite the close function to retrieve the data and, then restore it back to normal. :return: String containing all the alarm data """ def fake_close(): pass out_iostr = StringIO.StringIO() original_close = out_iostr.close alarms_table = self.__connect_alarms() # Retrieve the db as a json StringIO without the close method out_iostr.close = fake_close dataset.freeze(alarms_table.all(), format='json', fileobj=out_iostr) out_str = out_iostr.getvalue() out_iostr.close = original_close out_iostr.close() # Get only the required data and format it alarms_dict = {'alarms': json.loads(out_str)['results']} # This commented out line would prettify the string #json.dumps(alarms_dict, indent=4, separators=(',', ': ')) return json.dumps(alarms_dict)
def parse_file(path): with open(path, 'rb') as fh: ctx = json.load(fh) #if ctx['source_name'] not in ['MZ']: # return layers = ctx.pop('layers') for layer in layers: lctx = ctx.copy() lctx['layer_name'] = layer['name'] lctx['layer_id'] = layer['id'] lctx.pop('rest_url', None) tbl_name = slugify('%(source_name)s %(layer_name)s' % lctx, sep='_') tbl = database[tbl_name] tbl.delete() features = layer['data']['features'] print ' -> Generating:', tbl_name print ' ', layer['name'], layer['id'], len(features) for feature in features: attrs = convrow(feature.get('attributes')) attrs.update(lctx) tbl.insert(attrs) dataset.freeze(tbl, prefix=DEST_PATH, filename='%s.csv' % tbl_name, format='csv')
def parse_file(path): with open(path, 'rb') as fh: ctx = json.load(fh) if ctx['source_name'] not in ['NA', 'MZ']: return layers = ctx.pop('layers') for layer in layers: if layer['name'] in IGNORE_LAYERS: continue eng = dataset.connect('sqlite://') tbl = eng['all'] lctx = ctx.copy() lctx['layer_name'] = layer['name'] lctx['layer_id'] = layer['id'] del lctx['rest_url'] features = layer['data']['features'] print ctx['source_url'], layer['name'], layer['id'], len(features) for feature in features: attrs = convrow(feature.get('attributes')) attrs.update(lctx) tbl.insert(attrs) fn = '%(source_name)s-%(layer_id)s %(layer_name)s.csv' % lctx fn = slugify(fn) dataset.freeze(tbl, filename=fn, format='csv', prefix=DEST_PATH)
def backup_db(): '''Backup measurement data as timestamped csv-file. Return filename.''' table = get_table('measurements') data = table.all() day = date.today() fname = 'measurement_backup_%s.csv' % day.strftime("%Y-%m-%d") dataset.freeze(data, format='csv', filename='app/static/' + fname) return fname
def get_data(user): set = table.find(username=user) dataset.freeze(set, format='json', filename="export.json") with open("C:/users/jake/skydrive/projects/jpmc-cfg-cmh-15/export.json", 'r') as exportfile: toreturn = exportfile.read() os.remove("C:/users/jake/skydrive/projects/jpmc-cfg-cmh-15/export.json") return toreturn
def makedump(): db = dataset.connect(config['db']) result = db['users'].all() fh = open('users.json', 'wb') dataset.freeze(result, format='json', fileobj=fh) user = get_user() render = render_template('frame.html', lang=lang, page='main.html', user=user) return make_response(render)
def export_ctf(segments=None): db = dataset.connect(get_config('SQLALCHEMY_DATABASE_URI')) if segments is None: segments = ['challenges', 'teams', 'both', 'metadata'] groups = { 'challenges': [ 'challenges', 'files', 'tags', 'keys', 'hints', ], 'teams': [ 'teams', 'tracking', 'awards', ], 'both': [ 'solves', 'wrong_keys', 'unlocks', ], 'metadata': [ 'alembic_version', 'config', 'pages', 'containers', ] } # Backup database backup = io.BytesIO() backup_zip = zipfile.ZipFile(backup, 'w') for segment in segments: group = groups[segment] for item in group: result = db[item].all() result_file = io.BytesIO() dataset.freeze(result, format='json', fileobj=result_file) result_file.seek(0) backup_zip.writestr('db/{}.json'.format(item), result_file.read()) # Backup uploads upload_folder = os.path.join(os.path.normpath(app.root_path), get_config('UPLOAD_FOLDER')) for root, dirs, files in os.walk(upload_folder): for file in files: parent_dir = os.path.basename(root) backup_zip.write(os.path.join(root, file), arcname=os.path.join('uploads', parent_dir, file)) backup_zip.close() backup.seek(0) return backup
def print_records_calls(): db = dataset.connect('sqlite:///mydatabase.db') print_value = request.form recoreds_table = db[print_value['Print'][15:]].all() filename = 'app/temp_reports/'+str(print_value['Print'][15:]) +str((int(time.time())))+ '.csv' dataset.freeze(recoreds_table, format='csv', filename=filename) resp = make_response(open(filename).read()) resp.content_type = "application/csv" threading.Timer(interval=1,function=clear_temp()) return resp
def save_csv(self): """ Save calendar as csv """ print "Save event database as csv files" dataset.freeze(self.tables["by_event"].all(), format='csv', filename="data/by_event.csv") dataset.freeze(self.tables["by_person"].all(), format='csv', filename="data/by_person.csv")
def dumpjson(): day = time.localtime(time.time())[7] hour = time.localtime(time.time())[3] minute = time.localtime(time.time())[4] fulltime = str(day) + '-' + str(hour) + '-' + str(minute) print fulltime timestampFilename = 'tweets' + fulltime + '.json' db = dataset.connect(settings.CONNECTION_STRING) result = db[settings.TABLE_NAME].all() dataset.freeze(result, format='json', filename=timestampFilename)
def returntablecontents(): db = dataset.connect('sqlite:///serverdb.db') # all_files = [os.path.join(dp, f) for dp, dn, filenames in os.walk("/home/karm/datafiles") for f in filenames] # Clean files from dataframe incase user pushes fewer files. # map(delfiles,all_files) data = [x for x in db['files']] # This JSON is for easy debugging result = db['files'].all() dataset.freeze(result, format='json', filename='files.json') return jsonify(data)
def dump_db(directory): """ Dump the database. Directory required. """ db = dataset.connect(app_config.POSTGRES_URL) local('mkdir -p {0}'.format(directory)) for table_name in db.tables: table = db[table_name] results = table.all() backup_filename = '{0}/{1}.csv'.format(directory, table_name) with open(backup_filename, 'w') as f: dataset.freeze(results, format='csv', fileobj=f)
def write_agency_lookup(): """ Write agency lookup """ result = db.query(""" select a.ori7, a.agency, a.agentype, a.state from agencies as a join clearance_rates as c on a.ori7 = c.ori7 group by a.ori7, a.agency, a.agentype, a.state order by a.ori7 """) dataset.freeze(result, format='csv', filename='output/agency_names.csv')
def main(): # Create the EventHandler and pass it your bot's token. updater = Updater("215450926:AAELiS1y9NIczfgy19KO48mnlMVrcf4xVCs") # Get the dispatcher to register handlers dp = updater.dispatcher # Add conversation handler with the states GENDER, PHOTO, LOCATION and BIO conv_handler = ConversationHandler( entry_points=[CommandHandler('iOsum', iOsum, pass_user_data=True)], states={ PROCESSAMOUNT: [ MessageHandler(Filters.text, process_amount, pass_user_data=True) ], CONFIRMAMOUNT: [RegexHandler('^(Yes|No)$', confirmerAmount, pass_user_data=True)], PROCESSREASON: [ MessageHandler(Filters.text, process_reason, pass_user_data=True) ], CONFIRMREASON: [RegexHandler('^(Yes|No)$', confirmerReason, pass_user_data=True)], GENERATETOKEN: [MessageHandler(Filters.text, token, pass_user_data=True)] }, fallbacks=[CommandHandler('cancel', cancel)]) dp.add_handler(conv_handler) dp.add_handler(CommandHandler('cancel', cancel)) #dp.add_handler(CallbackQueryHandler(calci)) # log all errors dp.add_error_handler(error) # Start the Bot updater.start_polling() print('Started Bot') # Run the bot until the you presses Ctrl-C or the process receives SIGINT, # SIGTERM or SIGABRT. This should be used most of the time, since # start_polling() is non-blocking and will stop the bot gracefully. updater.idle() #Export collected data logger.info("Exiting. Saving collected data") db = dataset.connect('sqlite:///exportdata/transactions.db') table = db['usertransactions'] dataset.freeze(table, format='json', filename='transactions.json') print('Bye')
def get_insights(): """ gets insights for summary """ db = dataset.connect(app_config.POSTGRES_URL) result = db.query(""" select f1.created_time, f1.people_reached, f1.shares, f1.likes, f1.comments, f1.link_clicks, s.has_lead_art, s.lead_art_provider, f1.art_root_url, s.lead_art_root_url, s.slug, s.has_audio from facebook f1 inner join (select link_url, max(run_time) as max_run_time from facebook group by link_url) f2 on f1.link_url = f2.link_url and f1.run_time = f2.max_run_time join seamus s on f1.link_url = s.canonical_url """) result_list = list(result) matching_results = [] matching = 0 no_lead_art = 0 non_matching = 0 for row in result_list: if row['art_root_url'] == row['lead_art_root_url']: row['provider_category'] = _get_provider_category(row) row['provider_type'] = _get_provider_type(row) matching_results.append(row) matching += 1 elif row['lead_art_root_url']: non_matching += 1 else: no_lead_art += 1 dataset.freeze(matching_results, format='csv', filename='www/live-data/insights.csv') total_rows = matching + non_matching + no_lead_art insights_art_match = [{ 'matching': matching, 'matching_pct': 100 * float(matching) / total_rows, 'no_lead_art': no_lead_art, 'no_lead_art_pct': 100 * float(no_lead_art) / total_rows, 'non_matching': non_matching, 'non_matching_pct': 100 * float(non_matching) / total_rows, 'total': total_rows, }] dataset.freeze(insights_art_match, format='csv', filename='www/live-data/insights_art_match.csv')
def run_report(): global mem_dev report_result = members.all() report_name = datetime.now().strftime("Report_%m%d%y_%H%M%S.csv") dataset.freeze(report_result, format='csv', filename=report_name) #shell_command = "mv " + report_name + " " + mem_dev #print shell_command try: #os.system(shell_command) shutil.move(report_name, mem_dev) except: tkMessageBox.showwarning("Warning!", "Report not moved to USB.") else: tkMessageBox.showinfo("Succes", "Report saved!") return "break"
def freeze_all(): for ds, query in get_queries(): out_base = os.path.join('exports5', private_or_public(ds)) try: ds['export_query'] = query path = os.path.join(out_base, ds[u'name']) if not os.path.isdir(path): os.makedirs(path) ds_path = os.path.join(path, 'dataset.json') with open(ds_path, 'w') as fh: json.dump(ds, fh, default=json_default, indent=2) res = engine.query(query) dataset.freeze(res, filename='dataset.csv', prefix=path, format='csv') except Exception as e: print(e)
def freeze_all(): out_base = "exports" for ds, query in get_queries(): try: ds["export_query"] = query path = os.path.join(out_base, ds["name"]) if not os.path.isdir(path): os.makedirs(path) ds_path = os.path.join(path, "dataset.json") with open(ds_path, "wb") as fh: json.dump(ds, fh, default=json_default, indent=2) res = engine.query(query) dataset.freeze(res, filename="facts.csv", prefix=path, format="csv") except Exception, e: print e
def export_from_db(database): rest = database['sessions'].all() dataset.freeze(rest, format='json', filename='da-sessions.json') rest = database['sessions'].all() dataset.freeze(rest, format='csv', filename='da-sessions.csv') rest = database['agenda'].all() dataset.freeze(rest, format='json', filename='da-agenda.json') rest = database['agenda'].all() dataset.freeze(rest, format='csv', filename='da-agenda.csv')
def get_seamus_verticals(): """ gets all seamus content by vertical and writes to csv """ db = dataset.connect(app_config.POSTGRES_URL) result = db.query(""" select distinct(s.slug), count(s.story_id) as slug_count, count(case when s.has_audio then 1 else null END) as count_has_audio, count(case when s.has_audio then null else 1 END) as count_has_no_audio from seamus s group by s.slug order by slug_count desc """) result_list = list(result) max_result = list( db.query(""" select max(publication_date) from seamus """)).pop(0) min_result = list( db.query(""" select min(publication_date) from seamus s """)).pop(0) difference = max_result['max'] - min_result['min'] total_sum = list( db.query(""" select count(story_id) from seamus s """)).pop(0) for row in result_list: avg = row['slug_count'] / float(difference.days) row['stories_per_day'] = avg percent_of_total = row['slug_count'] / float(total_sum['count']) * 100 row['percent_of_total'] = percent_of_total dataset.freeze(result_list, format='csv', filename='www/live-data/seamus_summary.csv')
def push(force): serverurl = "http://139.59.90.147:5000/v1/sendfilelist" """ Sync your local directory with server will always overwrite server""" # 1. check local head and see if there are any changes # 2. if yes, update filetable # 3 Transmit updated filetable # 4 use rsync to transfer files themselves. db = dataset.connect('sqlite:///mydatabase.db') table = db['files'] flag = False #Boolean flag to check if a push even needs to occur for filed in db['files']: if check_if_changed(filed) == (True, True) or force: table.delete(filepath=filed['filepath']) table.insert(create_dict(filed['filepath'])) flag = True elif check_if_changed(filed) == (False, False): # File has been deleted locally table.delete(filepath=filed['filepath']) flag = True db.commit() if flag or force: result = db['files'].all() dataset.freeze(result, format='json', filename='files.json') with open("files.json", "rb") as filed: json_data = json.load(filed) click.echo(json_data) try: r = requests.get(serverurl, json=json_data, headers={'Content-Type': 'application/json'}) except Exception as e: print e for filed in db['files']: f = filed['filepath'] args = [ "-avz", f, "[email protected]:/home/karm/datafiles/", "--relative" ] p = Popen(['rsync'] + args, shell=False) print p.wait() else: click.echo("No files have been changed to push") return
def export_from_script(self, table): """ Exports the table provided. :param table: :return: """ query = self.db[table] print(query) if not query: print("DB") raise InvalidQueryError("The DB Query is Invalid") else: print("Let") try: dataset.freeze(query, format='csv', filename='data/data.csv') print("Here") except Exception as error: print("Whe") return error
def freeze_all(): out_base = 'exports' for ds, query in get_queries(): try: path = os.path.join(out_base, ds['name']) #print [query] if not os.path.isdir(path): os.makedirs(path) ds_path = os.path.join(path, 'dataset.json') with open(ds_path, 'wb') as fh: ds['data'] = json.loads(ds['data']) json.dump(ds, fh, default=json_default, indent=2) res = engine.query(query) dataset.freeze(res, filename='facts.csv', prefix=path, format='csv') except Exception, e: print e
def main(): #Open Database Connection db = dataset.connect('sqlite:///exercise01.sqlite') #Examine tables and columns, print for reference # Note: Commented out, for initial run only # tables = db.tables # print(db.tables) # for table in tables: # print(table, db[table].columns) # Select Query to flatten relationships in database # Note: removes duplicate IDs from lookup tables and renames attributes where necessary query = '''SELECT records.id, age, education_num, capital_gain, capital_loss, hours_week, over_50k, workclasses.name AS workclass, education_levels.name AS education_level, marital_statuses.name AS marital_status, occupations.name AS occupation, relationships.name AS relationship, races.name AS race, sexes.name AS sex, countries.name AS country FROM records LEFT JOIN workclasses ON workclasses.id = records.workclass_id LEFT JOIN education_levels ON education_levels.id = records.education_level_id LEFT JOIN marital_statuses ON marital_statuses.id = records.marital_status_id LEFT JOIN occupations ON occupations.id = records.occupation_id LEFT JOIN relationships ON relationships.id = records.relationship_id LEFT JOIN races ON races.id = records.race_id LEFT JOIN sexes ON sexes.id = records.sex_id LEFT JOIN countries ON countries.id = records.country_id ''' # Execute query using dataset library result = db.query(query) #Note: Make exception handling, check for nulls and check for equal count # Export query results to csv using dataset library dataset.freeze(result, format='csv', filename='datafiles/Ex1_Flat_table.csv')
def get_photo_efforts(): """ Get did we touch it db combined with homepage db """ db = dataset.connect(app_config.POSTGRES_URL) result = db.query(""" select s.duration, s.contribution, s.seamus_id, hp.url from homepage hp inner join (select story_id, max(run_time) as max_run_time from homepage group by story_id) hp2 on hp.story_id = hp2.story_id and hp.run_time = hp2.max_run_time right join spreadsheet s on hp.story_id = s.seamus_id """) result_list = list(result) for row in result_list: row['on_homepage'] = (row['url'] != None) dataset.freeze(result_list, format='csv', filename='www/live-data/photo_efforts.csv')
def get_photo_efforts_fb(): """ Get did we touch it db combined with facebook db """ db = dataset.connect(app_config.POSTGRES_URL) result = db.query(""" select s.duration, s.contribution, s.seamus_id, f1.facebook_id from facebook f1 inner join (select link_url, max(run_time) as max_run_time from facebook group by link_url) f2 on f1.link_url = f2.link_url and f1.run_time = f2.max_run_time right join spreadsheet s on f1.seamus_id = s.seamus_id """) result_list = list(result) for row in result_list: row['on_facebook'] = (row['facebook_id'] != None) dataset.freeze(result_list, format='csv', filename='www/live-data/photo_efforts_fb.csv')
def freeze_all(): out_base = 'exports' for ds, query in get_queries(): try: ds['export_query'] = query path = os.path.join(out_base, ds['name']) if not os.path.isdir(path): os.makedirs(path) ds_path = os.path.join(path, 'dataset.json') with open(ds_path, 'wb') as fh: json.dump(ds, fh, default=json_default, indent=2) res = engine.query(query) dataset.freeze(res, filename='facts.csv', prefix=path, format='csv') except Exception, e: print e
def get_seamus_verticals(): """ gets all seamus content by vertical and writes to csv """ db = dataset.connect(app_config.POSTGRES_URL) result = db.query(""" select distinct(s.slug), count(s.story_id) as slug_count, count(case when s.has_audio then 1 else null END) as count_has_audio, count(case when s.has_audio then null else 1 END) as count_has_no_audio from seamus s group by s.slug order by slug_count desc """) result_list = list(result) max_result = list(db.query(""" select max(publication_date) from seamus """)).pop(0) min_result = list(db.query(""" select min(publication_date) from seamus s """)).pop(0) difference = max_result['max'] - min_result['min'] total_sum = list(db.query(""" select count(story_id) from seamus s """)).pop(0) for row in result_list: avg = row['slug_count'] / float(difference.days) row['stories_per_day'] = avg percent_of_total = row['slug_count'] / float(total_sum['count']) * 100 row['percent_of_total'] = percent_of_total dataset.freeze(result_list, format='csv', filename='www/live-data/seamus_summary.csv')
def get_raw_insights(): """ gets insights and art and writes csv """ db = dataset.connect(app_config.POSTGRES_URL) result = db.query(""" select f1.*, s.has_lead_art, s.lead_art_provider, s.lead_art_url, s.lead_art_root_url, s.title, s.story_id, s.slug, s.has_audio from facebook f1 inner join (select link_url, max(run_time) as max_run_time from facebook group by link_url) f2 on f1.link_url = f2.link_url and f1.run_time = f2.max_run_time join seamus s on f1.link_url = s.canonical_url """) result_list = list(result) for row in result_list: row['provider_category'] = _get_provider_category(row) row['provider_type'] = _get_provider_type(row) row['post_url'] = _make_post_url(row) dataset.freeze(result_list, format='csv', filename='www/live-data/insights_raw.csv')
def summarize(): table_1965_raw = agate.Table.from_csv('processed-data/1965.csv', COLUMNS) table_2015_raw = agate.Table.from_csv('processed-data/2015.csv', COLUMNS) for region, states, population in REGIONS: table_1965 = table_1965_raw.where(lambda row: row['state'] in states) table_2015 = table_2015_raw.where(lambda row: row['state'] in states) output = [] for col_name, col_type in COLUMNS[2:]: row = OrderedDict() row['var'] = col_name row['1965'] = table_1965.columns[col_name].aggregate(agate.Sum()) row['1965_per_capita'] = row['1965'] / population['1965'] row['2015'] = table_2015.columns[col_name].aggregate(agate.Sum()) row['2015_per_capita'] = row['2015'] / population['2014'] row['absolute_percent_change'] = (row['2015'] - row['1965']) / row['1965'] row['per_capita_percent_change'] = (row['2015_per_capita'] - row['1965_per_capita']) / row['1965_per_capita'] output.append(row) dataset.freeze(output, format='csv', filename='processed-data/{0}-sums.csv'.format(region))
def export(self): tracks = self.db['tracks'].all() dataset.freeze(tracks, format='json', filename=self.db_path + 'tracks.json') artists = self.db['artists'].all() dataset.freeze(artists, format='json', filename=self.db_path + 'artists.json') albums = self.db['albums'].all() dataset.freeze(albums, format='json', filename=self.db_path + 'albums.json')
def save_data(tablename: str, results: List[Dict[str, Any]], taskname: str, timestamp: Union[arrow.Arrow, datetime] = None, output_format: str = "csv"): """ Saves a dataset result set to a suitable output file. output_format can be one of: csv, json, tabson (see https://dataset.readthedocs.org/en/latest/api.html#dataset.freeze) """ if timestamp is None: timestamp = datetime.utcnow() filename = "{taskname}_{datetime}_{tablename}.{output_format}".format( taskname=taskname, tablename=tablename, datetime=timestamp.strftime(FILENAME_SAFE_ISOFORMAT), output_format=output_format ) log.info("Saving {tablename} data to {filename}".format( tablename=tablename, filename=filename)) dataset.freeze(results, format=output_format, filename=filename) if not os.path.isfile(filename): log.error( "save_data: file {} not created; empty results?".format(filename))
def summarize_question(question_id): """ Summarize responses for a given question ID """ question, result = query(question_id) print '{0}: {1}'.format(question_id, question['label']) counts = OrderedDict() for row in result: if not row['country'] in counts.keys(): counts[row['country']] = initialize_counts(question_id) counts[row["country"]][row["response"]] += 1 output = [] for country, values in counts.items(): output_row = OrderedDict((('country', country), )) total = 0 for label, value in values.items(): total += int(value) output_row['total_responses'] = total for label, value in values.items(): output_row[label] = value pct_label = '{0} pct'.format( label.encode('ascii', 'ignore').decode('ascii')) output_row[pct_label] = float(value) / total output.append(output_row) dataset.freeze(output, format='csv', filename='output/{0}.csv'.format(question_id))
def summarize_population_estimates(): for county in METRO_PARISHES: filename = 'output/population-summary/{0}.csv'.format(slugify(county)) result = db.query(""" select year, total, white, black, hispanic, asian, american_indian, native_hawaiian, two_or_more from population_estimates where county='{0}' order by year """.format(county)) dataset.freeze(result, format='csv', filename=filename)
def freeze_data(): print "Freezing dimension values..." prefix = os.path.join(get_output_dir(), 'data', 'dimensions') freeze(value_table.all(), prefix=prefix, filename='{{dimension_name}}.csv', format='csv') freeze(value_table.all(), prefix=prefix, filename='{{dimension_name}}.json', format='json') print "Freezing cubes..." for cube in get_cubes(): prefix = os.path.join(get_output_dir(), 'data', cube['statistic_name'], cube['cube_name']) slug = slugify(cube['statistic_title_de']) for (text, rb) in [('labeled', True), ('raw', False)]: q, ps = query_cube(cube['cube_name'], readable=rb) fn = '%s-%s-%s.csv' % (slug, cube['cube_name'], text) print [fn] freeze(engine.query(q), prefix=prefix, filename=fn)
def write_aliases(table): ap = os.path.join(DATA_PATH, "%s.csv" % table.table.name) dataset.freeze(table, filename=ap, format="csv")
import dataset db = dataset.connect('postgresql://*****:*****@testdb.cy2ub2trrp92.us-east-1.rds.amazonaws.com:5432/reddit') result = db['entries'].all() dataset.freeze(result, format='csv', filename='entries.csv')
def run_report(event=None): report_result = rules.all() report_name = datetime.now().strftime("Report_%m%d%y_%H%M%S.csv") dataset.freeze(report_result, format='csv', filename=report_name) tkMessageBox.showinfo("Succes", "Report saved!") return "break"