def get_relax_timestamps(self, return_indexes=False): """Returns times of each relax time frame start-end. :param return_indexes: True - return index in array; False - return actual time in array. :return: Array of tuples, where each tuple presents beginning and end of a relax time frame. """ dataread = self end_time = self.get_end_time_cognitive_load_study() relax_timestamps = dataread.get_relax_timestamps_from_file() data = [self.time, self.phase] timestamps = [] for i in range(0, len(relax_timestamps) - 1, 2): relax_start = relax_timestamps[i] relax_stop = relax_timestamps[i + 1] difference1 = (end_time - relax_start) / 1000 difference2 = (end_time - relax_stop) / 1000 start_on_data = data[0][-1] - difference1 stop_on_data = data[0][-1] - difference2 if return_indexes: start_on_data = bisect(data[0], start_on_data) stop_on_data = bisect(data[0], stop_on_data) timestamps.append((start_on_data, stop_on_data)) return timestamps
def get_data_task_timestamps(self, return_indexes=False): """Returns times of each task time frame start-end. :param return_indexes: True - return index in array; False - return actual time in array. :return: Array of tuples, where each tuple presents beginning and end of a task time frame. """ data = [self.time, self.phase] end_time = self.get_end_time_cognitive_load_study() cognitive_study_results = self.read_cognitive_load_study( self.ident + '-primary-extract.txt') timestamps = [] for i in range(len(cognitive_study_results)): task_1_start = cognitive_study_results['start_time'][i] task_1_length = cognitive_study_results['time_on_task'][i] difference = (end_time - task_1_start) / 1000 start_on_data = data[0][-1] - difference end_on_data = start_on_data + task_1_length / 1000 if return_indexes: start_on_data = bisect(data[0], start_on_data) end_on_data = bisect(data[0], end_on_data) timestamps.append((start_on_data, end_on_data)) return timestamps
def monochromatic(colours): # print(colours) hue_vals = [-10, -5, -2, 15, 45, 65, 165, 180, 265, 300, 340, 360] colour_vals = '0GROYGCBPVR' curr_colour = None for colour in colours: H, S, L = colour H = H * 360 S = S * 100 L = L * 100 if S <= 10: H = -3 if L <= 10: continue if L > 97: continue if curr_colour is None: curr_colour = bisect(hue_vals, H) elif curr_colour != bisect(hue_vals, H): return False return True
def __init__(self, filename, min_len=3): lines = open(filename).read().upper().split() self.words = [word for word in lines if len(word) >= min_len] self.words.sort() self.bounds = {} for c in ALPHABET: c2 = chr(ord(c) + 1) self.bounds[c] = (bisect.bisect(self.words, c), bisect.bisect(self.words, c2))
def organize(sizs, breakpoints=[ 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000 ], specs='ABCDEFGHIJKL'): i = bisect(breakpoints, sizs) return specs[i]
def numSmallerByFrequency(self, queries, words): freq, ans = [], [] for word in words: c = Counter(word) freq.append(c[min(c.keys())]) freq.sort() n = len(freq) for query in queries: c = Counter(query) ans.append(n - bisect(freq, c[min(c.keys())])) return ans
def kEmptySlots(self, flowers, k): active = [] from _bisect import bisect for day, flower in enumerate(flowers, 1): i = bisect( active, flower ) #Return the index where to insert item x in list a, assuming a is sorted. for neighbor in active[i - (i > 0):i + 1]: #check for previous and next element if abs(neighbor - flower) - 1 == k: return day active.insert(i, flower) return -1
from _bisect import bisect prm = primes3(10**6) print prm[-1] def isCube(x): t = x**(1.0/3) t = int (t) t3 = t**3 if t**3==x or (t+1)**3==x: return True return False count = 0 idx = bisect(prm, 100) idx = len(prm) lastprime = 0 for n in xrange(1,1000): n=n**3 N3=n**3 N2=n**2 pind = lastprime p=prm[pind] while p<prm[-1]: p=prm[pind] X3 = N2*(n+p) if isCube(X3): count+=1 print p,n,X3**0.33334,'\t',count
def compare_extracted_hr_and_band(path, ident): """Compater heart rates acquired wirelessly and with Microfost Band. :param path: (str) main path to data, where user data is located in specific folders :param ident: (str) user identifier :return: MAE, MSE, CORRelation values of the aligned HR time series """ dataread = datareader.DataReader(path, ident) # initialize path to data data = dataread.read_grc_data() # read from files data = dataread.unwrap_grc_data() # unwrap phase. returns time and y values samp_rate = round(len(data[1]) / max(data[0])) dataextract = dataextractor.DataExtractor(data[0], data[1], samp_rate) cog_res = dataread.read_cognitive_load_study(ident + '-primary-extract.txt') end_epoch_time = dataread.get_end_time_cognitive_load_study() # end t extracted_br_features = dataextract.raw_windowing_breathing(30, 1) extracted_br_features['br_rate'] = np.array(extracted_br_features['br_rate'].rolling(6).mean()) extracted_br_features_roll_avg = extracted_br_features.loc[:, extracted_br_features.columns != 'times'].rolling( 6).mean() extracted_br_features_roll_avg['times'] = extracted_br_features['times'] extracted_br_features_roll_avg['br_ok'] = extracted_br_features['br_ok'] extracted_hr_features = dataextract.raw_windowing_heartrate(10, 1) extracted_hr_features = extracted_hr_features.drop(['hr_HRV_lf', 'hr_HRV_hf', 'hr_HRV_lf_hf'], axis=1) extracted_hr_features_roll_avg = extracted_hr_features.loc[:, extracted_hr_features.columns != 'times'].rolling( 10).mean() extracted_hr_features_roll_avg['times'] = extracted_hr_features['times'] extracted_hr_features_roll_avg['hr_ok1'] = extracted_hr_features['hr_ok'] bandread = bandreader.HeartRateBand(path + '_Hrates/', ident) band_data = bandread.load() band_data_time_start = bisect(band_data[0][:], end_epoch_time - data[0][-1] * 1000) band_data_time_stop = bisect(band_data[0][:], end_epoch_time) band_data = [band_data[0][band_data_time_start:band_data_time_stop], band_data[1][band_data_time_start:band_data_time_stop]] band_data_new_data = [(band_data[0] - band_data[0][0]) / 1000, band_data[1]] plt.figure(1) plt.clf() plt.plot(extracted_hr_features_roll_avg['times'], extracted_hr_features_roll_avg['hr_rate'], color='orange', label='Wi-Mind heart rate') plt.plot(band_data_new_data[0], band_data_new_data[1], color='green', label='Microsoft Band heart rate') plt.xlabel('time (s)') plt.ylabel('heart rate') plt.legend() plt.show() hr_data = extracted_hr_features_roll_avg[['times', 'hr_rate']] hr_data['times'] = hr_data['times'].astype(int) band_data = pd.DataFrame() band_data['times'] = band_data_new_data[0] band_data['times'] = band_data['times'].astype(int) band_data['rate'] = band_data_new_data[1] band_data = band_data.drop_duplicates(subset=['times']) together_data = pd.merge(hr_data, band_data, on='times') together_data = together_data.dropna() # new_hr = res_ind[intersect] # new_band = band_data_new__data[1][intersect] mae = metrics.mean_absolute_error(together_data['rate'], together_data['hr_rate']) mse = metrics.mean_squared_error(together_data['rate'], together_data['hr_rate']) corr = stats.pearsonr(together_data['rate'], together_data['hr_rate']) # print('mae amd mse: ', mae, mse) return mae, mse, corr
def full_signal_extract(path, ident): """Extract breathing and heartbeat features from one user and save features to file. :param path: (str) main path to data, where user data is located in specific folders :param ident: (str) user identifier :return: Nothing. It saves features (dataframe) to a .csv file """ dataread = datareader.DataReader(path, ident) # initialize path to data data = dataread.read_grc_data() # read from files data = dataread.unwrap_grc_data() # unwrap phase. returns time and y values samp_rate = round(len(data[1]) / max(data[0])) dataextract = dataextractor.DataExtractor(data[0], data[1], samp_rate) cog_res = dataread.read_cognitive_load_study(ident + '-primary-extract.txt') end_epoch_time = dataread.get_end_time_cognitive_load_study() # end t extracted_br_features = dataextract.raw_windowing_breathing(30, 1) extracted_br_features['br_rate'] = np.array(extracted_br_features['br_rate'].rolling(6).mean()) extracted_br_features_roll_avg = extracted_br_features.loc[:, extracted_br_features.columns != 'times'].rolling( 6).mean() extracted_br_features_roll_avg['times'] = extracted_br_features['times'] extracted_br_features_roll_avg['br_ok'] = extracted_br_features['br_ok'] extracted_hr_features = dataextract.raw_windowing_heartrate(10, 1) extracted_hr_features = extracted_hr_features.drop(['hr_HRV_lf', 'hr_HRV_hf', 'hr_HRV_lf_hf'], axis=1) extracted_hr_features_roll_avg = extracted_hr_features.loc[:, extracted_hr_features.columns != 'times'].rolling( 10).mean() extracted_hr_features_roll_avg['times'] = extracted_hr_features['times'] extracted_hr_features_roll_avg['hr_ok'] = extracted_hr_features['hr_ok'] extracted_hr_features2 = dataextract.raw_windowing_heartrate(100, 1) # longer time to extract HRV frequency feat. extracted_hr_features2 = extracted_hr_features2[['hr_HRV_lf', 'hr_HRV_hf', 'hr_HRV_lf_hf', 'times']] extracted_hr_features2_roll_avg = extracted_hr_features2.loc[:, extracted_hr_features2.columns != 'times'].rolling( 10).mean() extracted_hr_features2_roll_avg['times'] = extracted_hr_features2['times'] all_features = extracted_br_features_roll_avg all_features = pd.merge(all_features, extracted_hr_features_roll_avg, on='times') all_features = pd.merge(all_features, extracted_hr_features2_roll_avg, on='times') task_timestamps = dataread.get_data_task_timestamps() relax_timestamps = dataread.get_relax_timestamps() bandread = bandreader.HeartRateBand(path + '_Hrates/', ident) band_data = bandread.load() band_data_time_start = bisect(band_data[0][:], end_epoch_time - data[0][-1] * 1000) band_data_time_stop = bisect(band_data[0][:], end_epoch_time) band_data = [band_data[0][band_data_time_start:band_data_time_stop], band_data[1][band_data_time_start:band_data_time_stop]] band_data_new__data = [(band_data[0] - band_data[0][0]) / 1000, band_data[1]] hr_data = extracted_hr_features_roll_avg[['times', 'hr_rate']] hr_data['times'] = hr_data['times'].astype(int) band_data = pd.DataFrame() band_data['times'] = band_data_new__data[0] band_data['times'] = band_data['times'].astype(int) band_data['band_rate'] = band_data_new__data[1] band_data = band_data.drop_duplicates(subset=['times']) together_data = pd.merge(hr_data, band_data, on='times') together_data = together_data.dropna() for i in range(len(all_features['times'])): find_in_hr_data = bisect(together_data['times'], all_features['times'][i]) all_features.ix[i, 'band_rate'] = together_data['band_rate'][find_in_hr_data] for i in range(len(cog_res)): all_feat_ind_task_start = bisect(all_features['times'], task_timestamps[i][0]) all_feat_ind_task_end = bisect(all_features['times'], task_timestamps[i][1]) for j in cog_res.columns: all_features.ix[all_feat_ind_task_start:all_feat_ind_task_end, j] = cog_res.iloc[i][j] if cog_res.iloc[i][j] == 'GC' or cog_res.iloc[i][j] == 'PT': all_features.ix[all_feat_ind_task_start:all_feat_ind_task_end, 'keyboard_task'] = True elif cog_res.iloc[i][j] == 'HP' or cog_res.iloc[i][j] == 'FA' or cog_res.iloc[i][j] == 'NC' or \ cog_res.iloc[i][j] == 'SX': all_features.ix[all_feat_ind_task_start:all_feat_ind_task_end, 'keyboard_task'] = False for k in range(all_feat_ind_task_end - all_feat_ind_task_start + 1): all_features.ix[k + all_feat_ind_task_start, 'on_task_or_break_index'] = k for k in range(all_feat_ind_task_end - all_feat_ind_task_start, -1, -1): all_features.ix[all_feat_ind_task_end - k, 'on_task_or_break_index_down'] = k all_features.ix[all_feat_ind_task_start:all_feat_ind_task_end, 'on_task'] = True for i in range(len(relax_timestamps)): all_feat_ind_task_start = bisect(all_features['times'], relax_timestamps[i][0]) all_feat_ind_task_end = bisect(all_features['times'], relax_timestamps[i][1]) new_end = all_feat_ind_task_end + 30 # if i==0: # continue for k in range(all_feat_ind_task_end - all_feat_ind_task_start + 1): all_features.ix[k + all_feat_ind_task_start, 'on_task_or_break_index'] = k all_features.ix[k + all_feat_ind_task_start, 'consecutive_break'] = i for k in range(new_end - all_feat_ind_task_start + 1): all_features.ix[k + all_feat_ind_task_start, 'on_break_and_after_index'] = k if k <= 15: all_features.ix[k + all_feat_ind_task_start, 'engagement_increase'] = False elif k <= 30: all_features.ix[k + all_feat_ind_task_start, 'engagement_increase'] = np.nan else: all_features.ix[k + all_feat_ind_task_start, 'engagement_increase'] = True for k in range(all_feat_ind_task_end - all_feat_ind_task_start, -1, -1): all_features.ix[all_feat_ind_task_end - k, 'on_task_or_break_index_down'] = k all_features.ix[all_feat_ind_task_start:all_feat_ind_task_end, 'on_task'] = False all_features['person_id'] = cog_res['person_id'][0] all_features.to_csv(path_or_buf=path + ident + '/' + ident + '-data.csv', index=False)
def do_double_search(request_form): """ search method called from both welcome() and search() :param request_form: :return: """ search_term1 = request_form["doubleTermQuery1"].lower() search_term2 = request_form["doubleTermQuery2"].lower() language_var, country_var = request_form["languageAndRegion"].split(':', 1) try: specific_query1 = simple_query_totals({"query": "body_text_ws:%s" % search_term1, "filter": ["country_s:%s" % country_var, "langid_s:%s" % language_var]}) except (KeyError, HTTPError): return flask.render_template('no_results.html', query=search_term1, available_options=AVAILABLE_OPTIONS, search_mode='double') try: specific_query2 = simple_query_totals({"query": "body_text_ws:%s" % search_term2, "filter": ["country_s:%s" % country_var, "langid_s:%s" % language_var]}) except (KeyError, HTTPError): return flask.render_template('no_results.html', query=search_term2, available_options=AVAILABLE_OPTIONS, search_mode='double') # need to check country again for some reason matches = [specific_query1['num_docs'].sum(), specific_query2['num_docs'].sum()] ############################# # GET TOTALS FOR EVERYTHING # ############################# totals = simple_query_totals({"query": "*:*", "filter": ["country_s:%s" % country_var, "langid_s:%s" % language_var]}) gender_totals = totals.groupby('gender').num_docs.sum() age_totals = totals.groupby('age').num_docs.sum() age_totals = sort_and_filter_age(age_totals) age_totals_norm = age_totals / age_totals.sum() ########### # GENDER # ########### gender_specific_query1 = pd.DataFrame(data=specific_query1.groupby('gender').num_docs.sum(), index=['F', 'M']).fillna(0) gender_specific_query2 = pd.DataFrame(data=specific_query2.groupby('gender').num_docs.sum(), index=['F', 'M']).fillna(0) abs_percentages1 = gender_specific_query1.num_docs / gender_totals abs_percentages2 = gender_specific_query2.num_docs / gender_totals try: renormalizer1 = 1.0 / abs_percentages1.sum() except ZeroDivisionError: return flask.render_template('no_results.html', query=search_term1, available_options=AVAILABLE_OPTIONS, search_mode='double') try: renormalizer2 = 1.0 / abs_percentages2.sum() except ZeroDivisionError: return flask.render_template('no_results.html', query=search_term2, available_options=AVAILABLE_OPTIONS, search_mode='double') gender_query_adjusted1 = abs_percentages1 * renormalizer1 gender_query_adjusted2 = abs_percentages2 * renormalizer2 gender_comparison = pd.DataFrame( data={search_term1: gender_specific_query1.values.reshape(-1), search_term2: gender_specific_query2.values.reshape(-1)}, index=['F', 'M']).T gender_comparison_adjusted = pd.DataFrame( data={search_term1: gender_query_adjusted1.values, search_term2: gender_query_adjusted2.values}, index=['F', 'M']).T del gender_comparison.index.name chi2, pvalue, dof, expected = chi2_contingency(gender_comparison) gender_stats_level = bisect(P_LEVELS, pvalue) if gender_stats_level == len(P_LEVELS): gender_stats_msg = "Gender difference is <em>not</em> statistically significant (Chi-squared contingency test with p > %.4f)" % ( P_LEVELS[-1]) else: gender_stats_msg = "Gender difference is statistically significant at p < %s (p = %.4f with Chi-squared contingency test)" % ( P_LEVELS[gender_stats_level], pvalue) J = pd.DataFrame(gender_comparison_adjusted.unstack()) L = pd.DataFrame(data={'variable': [J.index.levels[1][x] for x in J.index.labels[1]], 'gender': [J.index.levels[0][x] for x in J.index.labels[0]], 'count': J.values.T[0].tolist()}) gender_plot = Bar(L, ylabel="percentage", group='gender', label='variable', values='count', title="Distribution by gender", logo=None, toolbar_location="below", # width=600, # height=400, legend='top_right', color=['blue', 'green'], webgl=False) ####### # AGE # ####### age_specific_query1 = specific_query1.groupby('age').num_docs.sum() age_specific_query1 = sort_and_filter_age(age_specific_query1) age_specific_query_norm1 = age_specific_query1 / age_specific_query1.sum() age_specific_query2 = specific_query2.groupby('age').num_docs.sum() age_specific_query2 = sort_and_filter_age(age_specific_query2) age_specific_query_norm2 = age_specific_query2 / age_specific_query2.sum() compare_age_df = pd.DataFrame({'background distribution': age_totals_norm, 'first term': pd.rolling_mean(age_specific_query_norm1, ROLLING_MEAN_FRAME), 'second term': pd.rolling_mean(age_specific_query_norm2, ROLLING_MEAN_FRAME) }) r, pvalue = spearmanr(compare_age_df['first term'], compare_age_df['second term']) age_stats_level = bisect(P_LEVELS, pvalue) if age_stats_level == len(P_LEVELS): age_stats_msg = "Age difference is <em>not</em> statistically significant (p > %s)" % (P_LEVELS[-1]) else: age_stats_msg = "Age difference is <em>statistically significant</em> at p < %s (p = %s)" % ( P_LEVELS[age_stats_level], pvalue) compare_age_df['i'] = compare_age_df.index age_plot = Line(compare_age_df, x='i', title="Age distribution", ylabel="percentage", xlabel='age', logo=None, toolbar_location="below", legend='top_right', color=['silver', 'blue', 'green'], # width=1000, # height=400, webgl=False) ######## # NUTS # ######## # TODO: what about missing regions? nuts_specific_query1 = specific_query1.groupby('nuts_3').num_docs.sum() nuts_specific_query2 = specific_query2.groupby('nuts_3').num_docs.sum() nuts_query_norm1 = nuts_specific_query1 / nuts_specific_query1.sum() nuts_query_norm2 = nuts_specific_query2 / nuts_specific_query2.sum() regions = list(sorted(set(nuts_specific_query1.index).union(set(nuts_specific_query2.index)))) nutsdiff = pd.DataFrame(0, index=regions, columns=arange(1)) nutsdiff[0] = nuts_query_norm1 - nuts_query_norm2 nutsdiff['G2'] = abs(nutsdiff[0]) > nutsdiff[0].abs().mean() outliers = sorted([x for x in regions if nutsdiff['G2'].ix[x].any() == True]) is_it_term2 = nutsdiff[0].ix[outliers] < 0 outliers1 = ', '.join( sorted(['%s (%s)' % (NUTS_NAMES[x], x) for x in is_it_term2.index if is_it_term2[x] == False])) outliers2 = ', '.join(sorted(['%s (%s)' % (NUTS_NAMES[x], x) for x in is_it_term2.index if is_it_term2[x] == True])) outlier_description = [] if outliers1: outlier_description.append( '<em>%s</em> is more prevalent than <em>%s</em> in regions %s' % (search_term1, search_term2, outliers1)) if outliers2: if outlier_description: outlier_description.append(', while <br />') outlier_description.append( '<em>%s</em> is more prevalent than <em>%s</em> in regions %s' % (search_term2, search_term1, outliers2)) outlier_description = ''.join(outlier_description) bokeh_script, (gender_plot_div, age_plot_div) = components((gender_plot, age_plot)) return flask.render_template('comparison_term_results.html', query1=search_term1, query2=search_term2, matches=matches, gender_comparison=gender_comparison.to_html(justify='right'), gender_stats_msg=gender_stats_msg, bokeh_script=bokeh_script, gender_plot=gender_plot_div, age_plot=age_plot_div, country_code=country_var, outlier_description=outlier_description, gender_total1=gender_specific_query1.sum().num_docs, gender_total2=gender_specific_query2.sum().num_docs, age_total1=age_specific_query1.sum(), age_total2=age_specific_query2.sum(), # age_total_M=age_specific_male_totals, # age_total_F=age_specific_female_totals, nuts_total1=nuts_specific_query1.sum(), nuts_total2=nuts_specific_query2.sum(), available_options=AVAILABLE_OPTIONS )