def main(): today = datetime.date.today() start_date = today - datetime.timedelta( 6) # since dates are including the boundaries data = fd.fetch_data(str(start_date), str(today)) insertToDB(data) print("Sucessfully updated data to Database")
def getRadvizPoints(self, session, filterByTerm): es_info = self._esInfo(session['domainId']) index = es_info['activeDomainIndex'] max_features = 200 #session['pagesCap'] = 12 if session.get('from') is None: session['from'] = 0 format = '%m/%d/%Y %H:%M %Z' if not session.get('fromDate') is None: session['fromDate'] = long( DomainModel.convert_to_epoch( datetime.strptime(session['fromDate'], format))) if not session.get('toDate') is None: session['toDate'] = long( DomainModel.convert_to_epoch( datetime.strptime(session['toDate'], format))) results_data = self.getTextQuery(session) ddteval_data = fetch_data(results_data["results"], es_doc_type=es_doc_type, es=es) data = ddteval_data["data"] labels = ddteval_data["labels"] urls = ddteval_data["urls"] tf_v = tf_vectorizer(convert_to_ascii=True, max_features=max_features) [X, features] = tf_v.vectorize(data) matrix_transpose = np.transpose(X.todense()) print "\n\n Number of 1-gram features = ", len(features) print "\n\n tf 1-gram matrix size = ", np.shape(X) # data = self.radviz.loadData_pkl("data/ht_data_200.pkl").todense() # data = np.transpose(data) # features = self.radviz.loadFeatures("data/ht_data_features_200.csv") # print features # print len(features) # labels = self.radviz.loadLabels("data/ht_data_labels_200.csv") # urls = self.radviz.loadSampleNames("data/ht_data_urls_200.csv") self.radviz = Radviz(X, features, labels, urls) return_obj = {} for i in range(0, len(features)): return_obj[features[i]] = matrix_transpose[i, :].tolist()[0] labels_urls = OrderedDict([("labels", labels), ("urls", urls), ("title", ddteval_data["title"]), ("snippet", ddteval_data["snippet"]), ("image_url", ddteval_data["image_url"])]) od = OrderedDict( list(OrderedDict(sorted(return_obj.items())).items()) + list(labels_urls.items())) return od
def index(): form = FilterForm() success = True if form.is_submitted(): data, headers = fetch_data(form) if len(data) != 0: return render_template("search.html", data=data, headers=headers) else: success = False return render_template("index.html", form=form, success=success)
def nessus_data_clone(): end_date = datetime.datetime.today() start_date = end_date - datetime.timedelta(days=1) while start_date != datetime.datetime( 1950, 1, 1): # old enough date for bulk fetch and store data = fetch_data(start_date, end_date) store_data(data) del data end_date = start_date start_date = end_date - datetime.timedelta(days=1)
def main(): data = fetch_data() warp = LightFM(loss='warp') logistic = LightFM(loss='logistic') bpr = LightFM(loss='bpr') warp.fit(data['matrix'], epochs=30, num_threads=2) logistic.fit(data['matrix'], epochs=30, num_threads=2) bpr.fit(data['matrix'], epochs=30, num_threads=2) print('Using the WARP loss function: ') recommendation(model=warp, data=data['matrix'], users=[20, 23, 50]) print('\n') print("Using the Logistic loss function") recommendation(model=logistic, data=data['matrix'], users=[20, 23, 50]) print('\n') print("Using the BPR loss function") recommendation(model=bpr, data=data['matrix'], users=[20, 23, 50]) print('\n')
for j in range(len(manhattan_polygon[i])): manhattan_polygon[i][j] = float(manhattan_polygon[i][j]) manhattan_polygon[i] = tuple(manhattan_polygon[i]) poly_file.close() # Set time window for SQL query DELTA = timedelta(seconds=30) # Had to use a dummy date so that I could add and subtract timedeltas from times INTERVAL_START = datetime(2013,01,01,12,00,00) INTERVAL_END = datetime(2013,01,01,13,59,59) start_datetime = datetime(2013,01,01,12,00,00) end_datetime = datetime(2013,01,01,12,00,01) i = 0 # Open CSV file to which taxi travel times will be written outputFile = open("manhattan_rides.csv", "w") while end_datetime <= INTERVAL_END and i < 1: queryString = "SELECT pickup_datetime,dropoff_datetime,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude FROM [833682135931:nyctaxi.trip_data] WHERE (FLOAT(pickup_longitude) BETWEEN -74.0382 AND -73.9030) AND (FLOAT(dropoff_longitude) BETWEEN -74.0382 AND -73.9030) AND (FLOAT(pickup_latitude) BETWEEN 40.6780 AND 40.8860) AND (FLOAT(dropoff_latitude) BETWEEN 40.6780 AND 40.8860) AND (TIME(dropoff_datetime) BETWEEN TIME(\'" + str(start_datetime) + "\') AND TIME(\'" + str(INTERVAL_END) + "\')) AND (TIME(pickup_datetime) BETWEEN TIME(\'" + str(start_datetime) + "\') AND TIME(\'" + str(end_datetime) + "\'))" results = fetch_data(queryString) for row in results: if point_in_poly(float(row[3]), float(row[2]), manhattan_polygon) and point_in_poly(float(row[5]), float(row[4]), manhattan_polygon): travel_time = datetime.strptime(row[1], "%Y-%m-%d %H:%M:%S")-datetime.strptime(row[0], "%Y-%m-%d %H:%M:%S") outputFile.write(str(travel_time.total_seconds()) + ",") outputFile.write(",".join(row[2:6])) outputFile.write('\n') start_datetime += DELTA end_datetime += DELTA i += 1 outputFile.close()
import sys import numpy as np from lightfm.datasets import fetch_movielens from lightfm import LightFM from fetch_data import fetch_data #CHALLENGE part 1 of 3 - write your own fetch and format method for a different recommendation #dataset. Here a good few https://gist.github.com/entaroadun/1653794 #And take a look at the fetch_movielens method to see what it's doing # data = fetch_data() #fetch data and format it data = fetch_movielens(min_rating=4.0) #print training and testing data #print(repr(data['train'])) #print(data['train']) #print(repr(data['test'])) #CHALLENGE part 2 of 3 - use 3 different loss functions (so 3 different models), compare results, print results for #the best one. - Available loss functions are warp, logistic, bpr, and warp-kos. #create model model = LightFM(loss='warp') #model2 = LightFM(loss='logistic') #model3 = LightFM(loss='bpr') #model4 = LightFM(loss='warp-kos') #train model model.fit(data['train'], epochs=30, num_threads=2)
def task(url, p): fetch_data.fetch_data(url) time.sleep(1) p.addThread()
from markov_python.cc_markov import MarkovChain from fetch_data import fetch_data lyrics = fetch_data() mc = MarkovChain() for song in lyrics: mc.add_string(song) print mc """text = mc.generate_text(400) for i in range(0,400,5): for j in text[i:i+5]: print j, print "" if i % 4 == 0: print "\n"""
# Map States to number map_state = dict({"NAME":"state","Alabama":"01","Alaska":"02","Arizona":"04","Arkansas":"05","California":"06","Colorado":"08","Connecticut":"09","Delaware":"10","District of Columbia":"11","Florida":"12","Georgia":"13","Hawaii":"15","Idaho":"16","Illinois":"17","Indiana":"18","Iowa":"19","Kansas":"20","Kentucky":"21","Louisiana":"22","Maine":"23","Maryland":"24","Massachusetts":"25","Michigan":"26","Minnesota":"27","Mississippi":"28","Missouri":"29","Montana":"30","Nebraska":"31","Nevada":"32","New Hampshire":"33","New Jersey":"34","New Mexico":"35","New York":"36","North Carolina":"37","North Dakota":"38","Ohio":"39","Oklahoma":"40","Oregon":"41","Pennsylvania":"42","Rhode Island":"44","South Carolina":"45","South Dakota":"46","Tennessee":"47","Texas":"48","Utah":"49","Vermont":"50","Virginia":"51","Washington":"53","West Virginia":"54","Wisconsin":"55","Wyoming":"56","Puerto Rico":"72"}) r_crit = map_state[r_crit_state] # Import Modules sys.path.append(os.path.split(os.path.realpath(__file__))[0]) import fetch_data as fd # This module fetching data from Census Bureau fd = reload(fd) # Make sure newest module is loaded import construct_deathdata as cd # This module calculate rates from input data and fetched population data cd = reload(cd) # Make sure newest module is loaded import data_filter as df # This module filtered the result based on input df = reload(df) # Make sure newest module is loaded # Call fetch_data function in fd module. This module return the population matrix for each geographic unit # ,and the age structure (percentage of each age group) [r_note_col, result, percent] = fd.fetch_data(base_year, r_crit_level, r_crit, r_year, r_geolevel, age_structure) if partial_data == 'TRUE': filt_dict = df.build_filt_dict (inputdata, id_field) [result, r_note_col] = df.filter_with_dict (result, r_note_col, "GEOID", filt_dict) # Write population matrix, and standard population structure into files f = open(outputfolder + "\\" + "PopAge_structure_" + r_crit_level + r_crit + ".csv", "w") head = True for row in result: if head: headerline = row head = False temp_text = cd.vect_to_str(row) f.writelines(temp_text + "\n") f.close()
def pg2csv(database, subject_id, subject_id_hashed, data_root_dir, probe_info, runtype, server_address, usr, pwd, time_start, time_end): #with open(subjects_info) as csvfile: # subject_info = csv.reader(csvfile, delimiter=';', quotechar='|') # for row in subject_info: # if len(row)==2: # if row[0]==subject: # subject_id = row[1] # #year_start = int(row[2]) # #month_start = int(row[3]) # #day_start = int(row[4]) # #hour_start = int(row[5]) # #minute_start = int(row[6]) # #year_end = int(row[7]) # #month_end = int(row[8]) # #day_end = int(row[9]) # #hour_end = int(row[10]) # #minute_end = int(row[11]) # else: # subject_id = subject # print subject_id #Directory to put the extracted data in: dirname = data_root_dir+subject_id if os.path.exists(dirname): shutil.rmtree(dirname) os.makedirs(dirname) else: os.makedirs(dirname) #Set roughly the start and the end of the data timestamps #start_all = datetime.datetime(year_start,month_start,day_start,hour_start,minute_start,0) #end_all = datetime.datetime(year_end,month_end,day_end,hour_end,minute_end,59) #Convert to unix timestamp (seconds): #start_all_ts = start_all.strftime('%s') #end_all_ts = end_all.strftime('%s') import time start_all_ts = time.mktime(time_start.timetuple()) end_all_ts = time.mktime(time_end.timetuple()) #Reading probes info probes = [] with open(probe_info) as f: reader = csv.reader(f, delimiter=';', quoting=csv.QUOTE_NONE) for row in reader: if row[0]=='1': probes.append(row[1:len(row)]) # Extractng timestamps for data samples: # 'start': timestamps for the start of each trial (x: class; y: trial number) # 'end': timestamps for the end of each trial (x: class; y: trial number) # Numbers are stored in miliseconds here since the sensor timestamps are in ms. if runtype=='trial': triggers = fetch_data(database, subject_id_hashed, 'ActivityLog', 'FEATURE_VALUE', 'timestamp', start_all_ts, end_all_ts, False, server_address, usr, pwd) start = [] end = [] for row in triggers: if row[1]=='start': start.append(row[0]) elif row[1]=='end': end.append(row[0]) if len(start)!=len(end): print('Start and End triggers are inconsistent!') sys.exit(1) elif runtype=='all': start = [float(start_all_ts)] end = [float(end_all_ts)] else: print('Unknown Runtype '+runtype+'!') sys.exit(1) print with open('log_python.txt','a') as logfile: logfile.write('\n') logfile.close() # cut-off time in miliseconds at the beginning and the end clip_begin = 0 clip_end = 0 num_trials = len(start) for probe in probes: duplicate_timestamps = 0 empty_entry = 0 for j in range(num_trials): data = [] #Setting the start and end timestamps for each trial t1 = start[j]+clip_begin t2 = end[j]-clip_end #Converting t1 and t2 to secs for the probes that have their timestamps in secs if probe[4]=='ms': t1 = float(t1*1000.0) t2 = float(t2*1000.0) #Converting t1 and t2 to secs for the probes that have their timestamps in nanosecs if probe[4]=='ns': t1 = float(t1*1000000000.0) t2 = float(t2*1000000000.0) #print(probe) data_temp = fetch_data(database, subject_id_hashed, probe[0], probe[2], probe[3], t1, t2, False, server_address, usr, pwd) if not data_temp: #print('\033[93m'+'PG2CSV: There is no data for probe \''+ probe[1] + '\'' + '\033[0m') msg = 'Subject '+subject_id+': There is no data for probe \''+ probe[1] + '\'' print(msg) with open('log_python.txt','a') as logfile: logfile.write(msg + '\n') logfile.close() continue num_columns = len(data_temp[0]) for k in range(len(data_temp)): #The first column is the timestamp if probe[4]=='s': time = float('%.6f'%(deepcopy(data_temp[k][0]))) elif probe[4]=='ms': time = float('%.6f'%(deepcopy(data_temp[k][0])/1000.0)) else: time = float('%.6f'%(deepcopy(data_temp[k][0])/1000000000.0)) #Saving data sample only when it's different from the previous sample - this is a PR/PostgreSQL communication bug if not(len(data)==0) and time==data[len(data)-1][0] and probe[5]=='R': duplicate_timestamps = duplicate_timestamps+1 if len(data)==0 or time!=data[len(data)-1][0] or probe[5]=='N': data_row = [time] for kk in range(num_columns-1): if not str(data_temp[k][kk+1]): data_row.append('-99') empty_entry = empty_entry+1 else: data_row.append(deepcopy(data_temp[k][kk+1])) #data_row.append(class_label) #data_row.append(location_label) data.append(data_row) if empty_entry>0: msg = 'Subject '+subject_id+': '+str(empty_entry)+' empty entries for probe \''+probe[1]+'\' replaced with \'-99\'' print(msg) with open('log_python.txt','a') as logfile: logfile.write(msg + '\n') logfile.close() if duplicate_timestamps>0: msg = 'Subject '+subject_id+': '+str(duplicate_timestamps)+'/'+str(len(data_temp))+' duplicate timestamps for probe \''+probe[1]+'\' removed' print(msg) with open('log_python.txt','a') as logfile: logfile.write(msg + '\n') logfile.close() #Dumping the gathered samples if runtype=='trial': filename = dirname+'/'+probe[1]+'_trial%d.csv'%(j) else: filename = dirname+'/'+probe[1]+'.csv' with open(filename,'w') as csvfile: spamwriter = csv.writer(csvfile, delimiter='\t',quotechar='|',quoting=csv.QUOTE_MINIMAL) for i in range(len(data)): spamwriter.writerow(data[i])
def api(ticker): return fetch_data(ticker)
def main(args): data = fetch_data(args) print data.shape data = calculate_pairs(data, mean) print data.tail() print data.columns
def pg2csv(database, subject_id, subject_id_hashed, data_root_dir, probe_info, runtype, server_address, usr, pwd, time_start, time_end): #with open(subjects_info) as csvfile: # subject_info = csv.reader(csvfile, delimiter=';', quotechar='|') # for row in subject_info: # if len(row)==2: # if row[0]==subject: # subject_id = row[1] # #year_start = int(row[2]) # #month_start = int(row[3]) # #day_start = int(row[4]) # #hour_start = int(row[5]) # #minute_start = int(row[6]) # #year_end = int(row[7]) # #month_end = int(row[8]) # #day_end = int(row[9]) # #hour_end = int(row[10]) # #minute_end = int(row[11]) # else: # subject_id = subject # print subject_id #Directory to put the extracted data in: dirname = data_root_dir + subject_id if os.path.exists(dirname): shutil.rmtree(dirname) os.makedirs(dirname) else: os.makedirs(dirname) #Set roughly the start and the end of the data timestamps #start_all = datetime.datetime(year_start,month_start,day_start,hour_start,minute_start,0) #end_all = datetime.datetime(year_end,month_end,day_end,hour_end,minute_end,59) #Convert to unix timestamp (seconds): #start_all_ts = start_all.strftime('%s') #end_all_ts = end_all.strftime('%s') import time start_all_ts = time.mktime(time_start.timetuple()) end_all_ts = time.mktime(time_end.timetuple()) #Reading probes info probes = [] with open(probe_info) as f: reader = csv.reader(f, delimiter=';', quoting=csv.QUOTE_NONE) for row in reader: if row[0] == '1': probes.append(row[1:len(row)]) # Extractng timestamps for data samples: # 'start': timestamps for the start of each trial (x: class; y: trial number) # 'end': timestamps for the end of each trial (x: class; y: trial number) # Numbers are stored in miliseconds here since the sensor timestamps are in ms. if runtype == 'trial': triggers = fetch_data(database, subject_id_hashed, 'ActivityLog', 'FEATURE_VALUE', 'timestamp', start_all_ts, end_all_ts, False, server_address, usr, pwd) start = [] end = [] for row in triggers: if row[1] == 'start': start.append(row[0]) elif row[1] == 'end': end.append(row[0]) if len(start) != len(end): print('Start and End triggers are inconsistent!') sys.exit(1) elif runtype == 'all': start = [float(start_all_ts)] end = [float(end_all_ts)] else: print('Unknown Runtype ' + runtype + '!') sys.exit(1) print with open('log_python.txt', 'a') as logfile: logfile.write('\n') logfile.close() # cut-off time in miliseconds at the beginning and the end clip_begin = 0 clip_end = 0 num_trials = len(start) for probe in probes: duplicate_timestamps = 0 empty_entry = 0 for j in range(num_trials): data = [] #Setting the start and end timestamps for each trial t1 = start[j] + clip_begin t2 = end[j] - clip_end #Converting t1 and t2 to secs for the probes that have their timestamps in secs if probe[4] == 'ms': t1 = float(t1 * 1000.0) t2 = float(t2 * 1000.0) #Converting t1 and t2 to secs for the probes that have their timestamps in nanosecs if probe[4] == 'ns': t1 = float(t1 * 1000000000.0) t2 = float(t2 * 1000000000.0) #print(probe) data_temp = fetch_data(database, subject_id_hashed, probe[0], probe[2], probe[3], t1, t2, False, server_address, usr, pwd) if not data_temp: #print('\033[93m'+'PG2CSV: There is no data for probe \''+ probe[1] + '\'' + '\033[0m') msg = 'Subject ' + subject_id + ': There is no data for probe \'' + probe[ 1] + '\'' print(msg) with open('log_python.txt', 'a') as logfile: logfile.write(msg + '\n') logfile.close() continue num_columns = len(data_temp[0]) for k in range(len(data_temp)): #The first column is the timestamp if probe[4] == 's': time = float('%.6f' % (deepcopy(data_temp[k][0]))) elif probe[4] == 'ms': time = float('%.6f' % (deepcopy(data_temp[k][0]) / 1000.0)) else: time = float('%.6f' % (deepcopy(data_temp[k][0]) / 1000000000.0)) #Saving data sample only when it's different from the previous sample - this is a PR/PostgreSQL communication bug if not (len(data) == 0) and time == data[len(data) - 1][0] and probe[5] == 'R': duplicate_timestamps = duplicate_timestamps + 1 if len(data) == 0 or time != data[len(data) - 1][0] or probe[5] == 'N': data_row = [time] for kk in range(num_columns - 1): if not str(data_temp[k][kk + 1]): data_row.append('-99') empty_entry = empty_entry + 1 else: data_row.append(deepcopy(data_temp[k][kk + 1])) #data_row.append(class_label) #data_row.append(location_label) data.append(data_row) if empty_entry > 0: msg = 'Subject ' + subject_id + ': ' + str( empty_entry) + ' empty entries for probe \'' + probe[ 1] + '\' replaced with \'-99\'' print(msg) with open('log_python.txt', 'a') as logfile: logfile.write(msg + '\n') logfile.close() if duplicate_timestamps > 0: msg = 'Subject ' + subject_id + ': ' + str( duplicate_timestamps) + '/' + str( len(data_temp )) + ' duplicate timestamps for probe \'' + probe[ 1] + '\' removed' print(msg) with open('log_python.txt', 'a') as logfile: logfile.write(msg + '\n') logfile.close() #Dumping the gathered samples if runtype == 'trial': filename = dirname + '/' + probe[1] + '_trial%d.csv' % (j) else: filename = dirname + '/' + probe[1] + '.csv' with open(filename, 'w') as csvfile: spamwriter = csv.writer(csvfile, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL) for i in range(len(data)): spamwriter.writerow(data[i])
from cc_markov import MarkovChain from fetch_data import fetch_data url = 'http://songmeanings.com/songs/view/3530822107859540342/' text = fetch_data(url) MarkovChain.add_string(text) result = MarkovChain.generate_text() print(result)
""" script to fetch data from gateway and upload to influxdb """ import io from fetch_data import fetch_data from dump_to_influx import dump_to_influx buffer = io.BytesIO() for data in fetch_data(): buffer.write(data) buffer.seek(0) dump_to_influx(io.TextIOWrapper(buffer))