Exemple #1
0
    def get_relax_timestamps(self, return_indexes=False):
        """Returns times of each relax time frame start-end.

        :param return_indexes: True - return index in array; False - return actual time in array.
        :return: Array of tuples, where each tuple presents beginning and end of a relax time frame.
        """
        dataread = self
        end_time = self.get_end_time_cognitive_load_study()
        relax_timestamps = dataread.get_relax_timestamps_from_file()
        data = [self.time, self.phase]

        timestamps = []
        for i in range(0, len(relax_timestamps) - 1, 2):
            relax_start = relax_timestamps[i]
            relax_stop = relax_timestamps[i + 1]

            difference1 = (end_time - relax_start) / 1000
            difference2 = (end_time - relax_stop) / 1000

            start_on_data = data[0][-1] - difference1
            stop_on_data = data[0][-1] - difference2

            if return_indexes:
                start_on_data = bisect(data[0], start_on_data)
                stop_on_data = bisect(data[0], stop_on_data)

            timestamps.append((start_on_data, stop_on_data))

        return timestamps
Exemple #2
0
    def get_data_task_timestamps(self, return_indexes=False):
        """Returns times of each task time frame start-end.

        :param return_indexes: True - return index in array; False - return actual time in array.
        :return: Array of tuples, where each tuple presents beginning and end of a task time frame.
        """
        data = [self.time, self.phase]
        end_time = self.get_end_time_cognitive_load_study()
        cognitive_study_results = self.read_cognitive_load_study(
            self.ident + '-primary-extract.txt')

        timestamps = []
        for i in range(len(cognitive_study_results)):
            task_1_start = cognitive_study_results['start_time'][i]
            task_1_length = cognitive_study_results['time_on_task'][i]

            difference = (end_time - task_1_start) / 1000

            start_on_data = data[0][-1] - difference
            end_on_data = start_on_data + task_1_length / 1000

            if return_indexes:
                start_on_data = bisect(data[0], start_on_data)
                end_on_data = bisect(data[0], end_on_data)

            timestamps.append((start_on_data, end_on_data))

        return timestamps
Exemple #3
0
def monochromatic(colours):
    # print(colours)
    hue_vals = [-10, -5, -2, 15, 45, 65, 165, 180, 265, 300, 340, 360]
    colour_vals = '0GROYGCBPVR'

    curr_colour = None

    for colour in colours:
        H, S, L = colour
        H = H * 360
        S = S * 100
        L = L * 100

        if S <= 10:
            H = -3

        if L <= 10:
            continue

        if L > 97:
            continue

        if curr_colour is None:
            curr_colour = bisect(hue_vals, H)

        elif curr_colour != bisect(hue_vals, H):
            return False

    return True
Exemple #4
0
 def __init__(self, filename, min_len=3):
     lines = open(filename).read().upper().split()
     self.words = [word for word in lines if len(word) >= min_len]
     self.words.sort()
     self.bounds = {}
     for c in ALPHABET:
         c2 = chr(ord(c) + 1)
         self.bounds[c] = (bisect.bisect(self.words, c),
                           bisect.bisect(self.words, c2))
def organize(sizs,
             breakpoints=[
                 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000,
                 11000
             ],
             specs='ABCDEFGHIJKL'):
    i = bisect(breakpoints, sizs)
    return specs[i]
 def numSmallerByFrequency(self, queries, words):
     freq, ans = [], []
     for word in words:
         c = Counter(word)
         freq.append(c[min(c.keys())])
     freq.sort()
     n = len(freq)
     for query in queries:
         c = Counter(query)
         ans.append(n - bisect(freq, c[min(c.keys())]))
     return ans
Exemple #7
0
 def kEmptySlots(self, flowers, k):
     active = []
     from _bisect import bisect
     for day, flower in enumerate(flowers, 1):
         i = bisect(
             active, flower
         )  #Return the index where to insert item x in list a, assuming a is sorted.
         for neighbor in active[i - (i > 0):i +
                                1]:  #check for previous and next element
             if abs(neighbor - flower) - 1 == k:
                 return day
         active.insert(i, flower)
     return -1
Exemple #8
0
from _bisect import bisect

prm = primes3(10**6)
print prm[-1]


def isCube(x):
    t = x**(1.0/3)
    t = int (t)
    t3 = t**3
    if t**3==x or (t+1)**3==x:
        return True
    return False

count = 0
idx = bisect(prm, 100)
idx = len(prm)
lastprime = 0
for n in xrange(1,1000):
    n=n**3
    N3=n**3
    N2=n**2
    
    pind = lastprime
    p=prm[pind]
    while p<prm[-1]:
        p=prm[pind]
        X3 = N2*(n+p)
        if isCube(X3):
            count+=1
            print p,n,X3**0.33334,'\t',count
def compare_extracted_hr_and_band(path, ident):
    """Compater heart rates acquired wirelessly and with Microfost Band.

    :param path: (str) main path to data, where user data is located in specific folders
    :param ident: (str) user identifier
    :return: MAE, MSE, CORRelation values of the aligned HR time series
    """

    dataread = datareader.DataReader(path, ident)  # initialize path to data
    data = dataread.read_grc_data()  # read from files
    data = dataread.unwrap_grc_data()  # unwrap phase. returns time and y values

    samp_rate = round(len(data[1]) / max(data[0]))

    dataextract = dataextractor.DataExtractor(data[0], data[1], samp_rate)

    cog_res = dataread.read_cognitive_load_study(ident + '-primary-extract.txt')
    end_epoch_time = dataread.get_end_time_cognitive_load_study()  # end t

    extracted_br_features = dataextract.raw_windowing_breathing(30, 1)
    extracted_br_features['br_rate'] = np.array(extracted_br_features['br_rate'].rolling(6).mean())
    extracted_br_features_roll_avg = extracted_br_features.loc[:, extracted_br_features.columns != 'times'].rolling(
        6).mean()
    extracted_br_features_roll_avg['times'] = extracted_br_features['times']
    extracted_br_features_roll_avg['br_ok'] = extracted_br_features['br_ok']

    extracted_hr_features = dataextract.raw_windowing_heartrate(10, 1)
    extracted_hr_features = extracted_hr_features.drop(['hr_HRV_lf', 'hr_HRV_hf', 'hr_HRV_lf_hf'], axis=1)
    extracted_hr_features_roll_avg = extracted_hr_features.loc[:, extracted_hr_features.columns != 'times'].rolling(
        10).mean()
    extracted_hr_features_roll_avg['times'] = extracted_hr_features['times']
    extracted_hr_features_roll_avg['hr_ok1'] = extracted_hr_features['hr_ok']

    bandread = bandreader.HeartRateBand(path + '_Hrates/', ident)
    band_data = bandread.load()
    band_data_time_start = bisect(band_data[0][:], end_epoch_time - data[0][-1] * 1000)
    band_data_time_stop = bisect(band_data[0][:], end_epoch_time)
    band_data = [band_data[0][band_data_time_start:band_data_time_stop],
                 band_data[1][band_data_time_start:band_data_time_stop]]
    band_data_new_data = [(band_data[0] - band_data[0][0]) / 1000, band_data[1]]

    plt.figure(1)
    plt.clf()
    plt.plot(extracted_hr_features_roll_avg['times'], extracted_hr_features_roll_avg['hr_rate'], color='orange',
             label='Wi-Mind heart rate')

    plt.plot(band_data_new_data[0], band_data_new_data[1], color='green', label='Microsoft Band heart rate')
    plt.xlabel('time (s)')
    plt.ylabel('heart rate')
    plt.legend()
    plt.show()

    hr_data = extracted_hr_features_roll_avg[['times', 'hr_rate']]
    hr_data['times'] = hr_data['times'].astype(int)
    band_data = pd.DataFrame()
    band_data['times'] = band_data_new_data[0]
    band_data['times'] = band_data['times'].astype(int)
    band_data['rate'] = band_data_new_data[1]
    band_data = band_data.drop_duplicates(subset=['times'])

    together_data = pd.merge(hr_data, band_data, on='times')
    together_data = together_data.dropna()

    # new_hr = res_ind[intersect]
    # new_band = band_data_new__data[1][intersect]
    mae = metrics.mean_absolute_error(together_data['rate'], together_data['hr_rate'])
    mse = metrics.mean_squared_error(together_data['rate'], together_data['hr_rate'])
    corr = stats.pearsonr(together_data['rate'], together_data['hr_rate'])
    # print('mae amd mse: ', mae, mse)

    return mae, mse, corr
def full_signal_extract(path, ident):
    """Extract breathing and heartbeat features from one user and save features to file.

    :param path: (str) main path to data, where user data is located in specific folders
    :param ident: (str) user identifier
    :return: Nothing. It saves features (dataframe) to a .csv file
    """

    dataread = datareader.DataReader(path, ident)  # initialize path to data
    data = dataread.read_grc_data()  # read from files

    data = dataread.unwrap_grc_data()  # unwrap phase. returns time and y values

    samp_rate = round(len(data[1]) / max(data[0]))

    dataextract = dataextractor.DataExtractor(data[0], data[1], samp_rate)

    cog_res = dataread.read_cognitive_load_study(ident + '-primary-extract.txt')
    end_epoch_time = dataread.get_end_time_cognitive_load_study()  # end t

    extracted_br_features = dataextract.raw_windowing_breathing(30, 1)
    extracted_br_features['br_rate'] = np.array(extracted_br_features['br_rate'].rolling(6).mean())
    extracted_br_features_roll_avg = extracted_br_features.loc[:, extracted_br_features.columns != 'times'].rolling(
        6).mean()
    extracted_br_features_roll_avg['times'] = extracted_br_features['times']
    extracted_br_features_roll_avg['br_ok'] = extracted_br_features['br_ok']

    extracted_hr_features = dataextract.raw_windowing_heartrate(10, 1)
    extracted_hr_features = extracted_hr_features.drop(['hr_HRV_lf', 'hr_HRV_hf', 'hr_HRV_lf_hf'], axis=1)
    extracted_hr_features_roll_avg = extracted_hr_features.loc[:, extracted_hr_features.columns != 'times'].rolling(
        10).mean()
    extracted_hr_features_roll_avg['times'] = extracted_hr_features['times']
    extracted_hr_features_roll_avg['hr_ok'] = extracted_hr_features['hr_ok']
    extracted_hr_features2 = dataextract.raw_windowing_heartrate(100, 1)  # longer time to extract HRV frequency feat.
    extracted_hr_features2 = extracted_hr_features2[['hr_HRV_lf', 'hr_HRV_hf', 'hr_HRV_lf_hf', 'times']]
    extracted_hr_features2_roll_avg = extracted_hr_features2.loc[:, extracted_hr_features2.columns != 'times'].rolling(
        10).mean()
    extracted_hr_features2_roll_avg['times'] = extracted_hr_features2['times']

    all_features = extracted_br_features_roll_avg
    all_features = pd.merge(all_features, extracted_hr_features_roll_avg, on='times')
    all_features = pd.merge(all_features, extracted_hr_features2_roll_avg, on='times')

    task_timestamps = dataread.get_data_task_timestamps()
    relax_timestamps = dataread.get_relax_timestamps()

    bandread = bandreader.HeartRateBand(path + '_Hrates/', ident)
    band_data = bandread.load()
    band_data_time_start = bisect(band_data[0][:], end_epoch_time - data[0][-1] * 1000)
    band_data_time_stop = bisect(band_data[0][:], end_epoch_time)
    band_data = [band_data[0][band_data_time_start:band_data_time_stop],
                 band_data[1][band_data_time_start:band_data_time_stop]]
    band_data_new__data = [(band_data[0] - band_data[0][0]) / 1000, band_data[1]]

    hr_data = extracted_hr_features_roll_avg[['times', 'hr_rate']]
    hr_data['times'] = hr_data['times'].astype(int)
    band_data = pd.DataFrame()
    band_data['times'] = band_data_new__data[0]
    band_data['times'] = band_data['times'].astype(int)
    band_data['band_rate'] = band_data_new__data[1]
    band_data = band_data.drop_duplicates(subset=['times'])
    together_data = pd.merge(hr_data, band_data, on='times')
    together_data = together_data.dropna()

    for i in range(len(all_features['times'])):
        find_in_hr_data = bisect(together_data['times'], all_features['times'][i])
        all_features.ix[i, 'band_rate'] = together_data['band_rate'][find_in_hr_data]

    for i in range(len(cog_res)):
        all_feat_ind_task_start = bisect(all_features['times'], task_timestamps[i][0])
        all_feat_ind_task_end = bisect(all_features['times'], task_timestamps[i][1])
        for j in cog_res.columns:
            all_features.ix[all_feat_ind_task_start:all_feat_ind_task_end, j] = cog_res.iloc[i][j]
            if cog_res.iloc[i][j] == 'GC' or cog_res.iloc[i][j] == 'PT':
                all_features.ix[all_feat_ind_task_start:all_feat_ind_task_end, 'keyboard_task'] = True
            elif cog_res.iloc[i][j] == 'HP' or cog_res.iloc[i][j] == 'FA' or cog_res.iloc[i][j] == 'NC' or \
                    cog_res.iloc[i][j] == 'SX':
                all_features.ix[all_feat_ind_task_start:all_feat_ind_task_end, 'keyboard_task'] = False
        for k in range(all_feat_ind_task_end - all_feat_ind_task_start + 1):
            all_features.ix[k + all_feat_ind_task_start, 'on_task_or_break_index'] = k
        for k in range(all_feat_ind_task_end - all_feat_ind_task_start, -1, -1):
            all_features.ix[all_feat_ind_task_end - k, 'on_task_or_break_index_down'] = k
        all_features.ix[all_feat_ind_task_start:all_feat_ind_task_end, 'on_task'] = True

    for i in range(len(relax_timestamps)):
        all_feat_ind_task_start = bisect(all_features['times'], relax_timestamps[i][0])
        all_feat_ind_task_end = bisect(all_features['times'], relax_timestamps[i][1])
        new_end = all_feat_ind_task_end + 30
        # if i==0:
        #     continue
        for k in range(all_feat_ind_task_end - all_feat_ind_task_start + 1):
            all_features.ix[k + all_feat_ind_task_start, 'on_task_or_break_index'] = k
            all_features.ix[k + all_feat_ind_task_start, 'consecutive_break'] = i
        for k in range(new_end - all_feat_ind_task_start + 1):
            all_features.ix[k + all_feat_ind_task_start, 'on_break_and_after_index'] = k
            if k <= 15:
                all_features.ix[k + all_feat_ind_task_start, 'engagement_increase'] = False
            elif k <= 30:
                all_features.ix[k + all_feat_ind_task_start, 'engagement_increase'] = np.nan
            else:
                all_features.ix[k + all_feat_ind_task_start, 'engagement_increase'] = True
        for k in range(all_feat_ind_task_end - all_feat_ind_task_start, -1, -1):
            all_features.ix[all_feat_ind_task_end - k, 'on_task_or_break_index_down'] = k
        all_features.ix[all_feat_ind_task_start:all_feat_ind_task_end, 'on_task'] = False

    all_features['person_id'] = cog_res['person_id'][0]
    all_features.to_csv(path_or_buf=path + ident + '/' + ident + '-data.csv', index=False)
Exemple #11
0
def do_double_search(request_form):
    """
    search method called from both welcome() and search()
    :param request_form:
    :return:
    """
    search_term1 = request_form["doubleTermQuery1"].lower()
    search_term2 = request_form["doubleTermQuery2"].lower()
    language_var, country_var = request_form["languageAndRegion"].split(':', 1)

    try:
        specific_query1 = simple_query_totals({"query": "body_text_ws:%s" % search_term1,
                                               "filter": ["country_s:%s" % country_var, "langid_s:%s" % language_var]})
    except (KeyError, HTTPError):
        return flask.render_template('no_results.html', query=search_term1, available_options=AVAILABLE_OPTIONS,
                                     search_mode='double')

    try:
        specific_query2 = simple_query_totals({"query": "body_text_ws:%s" % search_term2,
                                               "filter": ["country_s:%s" % country_var, "langid_s:%s" % language_var]})
    except (KeyError, HTTPError):
        return flask.render_template('no_results.html', query=search_term2, available_options=AVAILABLE_OPTIONS,
                                     search_mode='double')

    # need to check country again for some reason
    matches = [specific_query1['num_docs'].sum(), specific_query2['num_docs'].sum()]

    #############################
    # GET TOTALS FOR EVERYTHING #
    #############################
    totals = simple_query_totals({"query": "*:*",
                                  "filter": ["country_s:%s" % country_var, "langid_s:%s" % language_var]})

    gender_totals = totals.groupby('gender').num_docs.sum()

    age_totals = totals.groupby('age').num_docs.sum()
    age_totals = sort_and_filter_age(age_totals)
    age_totals_norm = age_totals / age_totals.sum()

    ###########
    #  GENDER #
    ###########
    gender_specific_query1 = pd.DataFrame(data=specific_query1.groupby('gender').num_docs.sum(),
                                          index=['F', 'M']).fillna(0)
    gender_specific_query2 = pd.DataFrame(data=specific_query2.groupby('gender').num_docs.sum(),
                                          index=['F', 'M']).fillna(0)
    abs_percentages1 = gender_specific_query1.num_docs / gender_totals
    abs_percentages2 = gender_specific_query2.num_docs / gender_totals
    try:
        renormalizer1 = 1.0 / abs_percentages1.sum()
    except ZeroDivisionError:
        return flask.render_template('no_results.html', query=search_term1, available_options=AVAILABLE_OPTIONS,
                                     search_mode='double')
    try:
        renormalizer2 = 1.0 / abs_percentages2.sum()
    except ZeroDivisionError:
        return flask.render_template('no_results.html', query=search_term2, available_options=AVAILABLE_OPTIONS,
                                     search_mode='double')

    gender_query_adjusted1 = abs_percentages1 * renormalizer1
    gender_query_adjusted2 = abs_percentages2 * renormalizer2

    gender_comparison = pd.DataFrame(
        data={search_term1: gender_specific_query1.values.reshape(-1), search_term2: gender_specific_query2.values.reshape(-1)},
        index=['F', 'M']).T

    gender_comparison_adjusted = pd.DataFrame(
        data={search_term1: gender_query_adjusted1.values, search_term2: gender_query_adjusted2.values},
        index=['F', 'M']).T

    del gender_comparison.index.name
    chi2, pvalue, dof, expected = chi2_contingency(gender_comparison)
    gender_stats_level = bisect(P_LEVELS, pvalue)

    if gender_stats_level == len(P_LEVELS):
        gender_stats_msg = "Gender difference is <em>not</em> statistically significant (Chi-squared contingency test with p > %.4f)" % (
            P_LEVELS[-1])
    else:
        gender_stats_msg = "Gender difference is statistically significant at p < %s (p = %.4f with Chi-squared contingency test)" % (
            P_LEVELS[gender_stats_level], pvalue)

    J = pd.DataFrame(gender_comparison_adjusted.unstack())
    L = pd.DataFrame(data={'variable': [J.index.levels[1][x] for x in J.index.labels[1]],
                           'gender': [J.index.levels[0][x] for x in J.index.labels[0]],
                           'count': J.values.T[0].tolist()})

    gender_plot = Bar(L,
                      ylabel="percentage",
                      group='gender',
                      label='variable',
                      values='count',
                      title="Distribution by gender",
                      logo=None,
                      toolbar_location="below",
                      # width=600,
                      # height=400,
                      legend='top_right',
                      color=['blue', 'green'],
                      webgl=False)

    #######
    # AGE #
    #######
    age_specific_query1 = specific_query1.groupby('age').num_docs.sum()
    age_specific_query1 = sort_and_filter_age(age_specific_query1)
    age_specific_query_norm1 = age_specific_query1 / age_specific_query1.sum()
    age_specific_query2 = specific_query2.groupby('age').num_docs.sum()
    age_specific_query2 = sort_and_filter_age(age_specific_query2)
    age_specific_query_norm2 = age_specific_query2 / age_specific_query2.sum()

    compare_age_df = pd.DataFrame({'background distribution': age_totals_norm,
                                   'first term': pd.rolling_mean(age_specific_query_norm1, ROLLING_MEAN_FRAME),
                                   'second term': pd.rolling_mean(age_specific_query_norm2, ROLLING_MEAN_FRAME)
                                   })

    r, pvalue = spearmanr(compare_age_df['first term'], compare_age_df['second term'])
    age_stats_level = bisect(P_LEVELS, pvalue)

    if age_stats_level == len(P_LEVELS):
        age_stats_msg = "Age difference is <em>not</em> statistically significant (p > %s)" % (P_LEVELS[-1])
    else:
        age_stats_msg = "Age difference is <em>statistically significant</em> at p < %s (p = %s)" % (
            P_LEVELS[age_stats_level], pvalue)

    compare_age_df['i'] = compare_age_df.index
    age_plot = Line(compare_age_df,
                    x='i',
                    title="Age distribution",
                    ylabel="percentage",
                    xlabel='age',
                    logo=None,
                    toolbar_location="below",
                    legend='top_right',
                    color=['silver', 'blue', 'green'],
                    # width=1000,
                    # height=400,
                    webgl=False)

    ########
    # NUTS #
    ########
    # TODO: what about missing regions?
    nuts_specific_query1 = specific_query1.groupby('nuts_3').num_docs.sum()
    nuts_specific_query2 = specific_query2.groupby('nuts_3').num_docs.sum()
    nuts_query_norm1 = nuts_specific_query1 / nuts_specific_query1.sum()
    nuts_query_norm2 = nuts_specific_query2 / nuts_specific_query2.sum()

    regions = list(sorted(set(nuts_specific_query1.index).union(set(nuts_specific_query2.index))))
    nutsdiff = pd.DataFrame(0, index=regions, columns=arange(1))
    nutsdiff[0] = nuts_query_norm1 - nuts_query_norm2
    nutsdiff['G2'] = abs(nutsdiff[0]) > nutsdiff[0].abs().mean()

    outliers = sorted([x for x in regions if nutsdiff['G2'].ix[x].any() == True])
    is_it_term2 = nutsdiff[0].ix[outliers] < 0
    outliers1 = ', '.join(
        sorted(['%s (%s)' % (NUTS_NAMES[x], x) for x in is_it_term2.index if is_it_term2[x] == False]))
    outliers2 = ', '.join(sorted(['%s (%s)' % (NUTS_NAMES[x], x) for x in is_it_term2.index if is_it_term2[x] == True]))

    outlier_description = []
    if outliers1:
        outlier_description.append(
            '<em>%s</em> is more prevalent than <em>%s</em> in regions %s' % (search_term1, search_term2, outliers1))
    if outliers2:
        if outlier_description:
            outlier_description.append(', while <br />')
        outlier_description.append(
            '<em>%s</em> is more prevalent than <em>%s</em> in regions %s' % (search_term2, search_term1, outliers2))
    outlier_description = ''.join(outlier_description)

    bokeh_script, (gender_plot_div, age_plot_div) = components((gender_plot, age_plot))

    return flask.render_template('comparison_term_results.html',
                                 query1=search_term1,
                                 query2=search_term2,
                                 matches=matches,
                                 gender_comparison=gender_comparison.to_html(justify='right'),
                                 gender_stats_msg=gender_stats_msg,
                                 bokeh_script=bokeh_script,
                                 gender_plot=gender_plot_div,
                                 age_plot=age_plot_div,
                                 country_code=country_var,
                                 outlier_description=outlier_description,
                                 gender_total1=gender_specific_query1.sum().num_docs,
                                 gender_total2=gender_specific_query2.sum().num_docs,
                                 age_total1=age_specific_query1.sum(),
                                 age_total2=age_specific_query2.sum(),
                                 # age_total_M=age_specific_male_totals,
                                 # age_total_F=age_specific_female_totals,
                                 nuts_total1=nuts_specific_query1.sum(),
                                 nuts_total2=nuts_specific_query2.sum(),
                                 available_options=AVAILABLE_OPTIONS
                                 )