def tmp(): normal_emo_df = LoadingData.read_microsoft_emotions_to_dataframe('Normal', include_non_emotions_column=True) fight_emo_df = LoadingData.read_microsoft_emotions_to_dataframe('Fight', include_non_emotions_column=True) ms_attributes_normal_df = LoadingData.read_ms_face_attributes_to_dataframe('Normal') ms_attributes_fight_df = LoadingData.read_ms_face_attributes_to_dataframe('Fight') normal_emo_df, ms_attributes_normal_df = Filtering.ms_emotions_attributes_alignment(normal_emo_df, ms_attributes_normal_df) fight_emo_df, ms_attributes_fight_df = Filtering.ms_emotions_attributes_alignment(fight_emo_df, ms_attributes_fight_df) df_diff = abs(len(fight_emo_df) - len(ms_attributes_fight_df)) if len(fight_emo_df) > len(ms_attributes_fight_df): fight_emo_df = fight_emo_df[0:len(fight_emo_df) - df_diff] else: ms_attributes_fight_df = ms_attributes_fight_df[0:len(ms_attributes_fight_df) - df_diff] fight_ages = ms_attributes_fight_df['age'].get_values() df_diff = abs(len(normal_emo_df) - len(ms_attributes_normal_df)) if len(normal_emo_df) > len(ms_attributes_normal_df): normal_emo_df = normal_emo_df[0:len(normal_emo_df) - df_diff] else: ms_attributes_normal_df = ms_attributes_normal_df[0:len(ms_attributes_normal_df) - df_diff] normal_ages = ms_attributes_normal_df['age'].get_values() normal_anger = normal_emo_df['anger'].get_values() # normal_contempt = normal_emo_df['contempt'].get_values() normal_disgust = normal_emo_df['disgust'].get_values() normal_fear = normal_emo_df['fear'].get_values() normal_happiness = normal_emo_df['happiness'].get_values() # normal_neutral = normal_emo_df['neutral'].get_values() normal_sadness = normal_emo_df['sadness'].get_values() normal_surprise = normal_emo_df['surprise'].get_values() normal_emos = [normal_anger, normal_disgust, normal_fear, normal_happiness, normal_sadness, normal_surprise] fight_anger = fight_emo_df['anger'].get_values() # fight_contempt = fight_emo_df['contempt'].get_values() fight_disgust = fight_emo_df['disgust'].get_values() fight_fear = fight_emo_df['fear'].get_values() fight_happiness = fight_emo_df['happiness'].get_values() # fight_neutral = fight_emo_df['neutral'].get_values() fight_sadness = fight_emo_df['sadness'].get_values() fight_surprise = fight_emo_df['surprise'].get_values() fight_emos = [fight_anger, fight_disgust, fight_fear, fight_happiness, fight_sadness, fight_surprise] for emotion1 in fight_emos: emotions_pair_dependency(fight_ages, emotion1, var_name(fight_ages, globals()), var_name(emotion1, globals()))
def sightcorp_significant_analysis(sig_value): fight_emo_df = LoadingData.read_sightcorp_emotions_to_dataframe('Fight', include_post_id=True) normal_emo_df = LoadingData.read_sightcorp_emotions_to_dataframe('Normal', include_post_id=True) print 'len(fight_df)=', len(fight_emo_df) print 'len(normal_df)=', len(normal_emo_df) print '----------------------------------' sig_anger_fight_emo_df = Filtering.filtering_by_significant_anger_emo_value(fight_emo_df, sig_value) sig_anger_normal_emo_df = Filtering.filtering_by_significant_anger_emo_value(normal_emo_df, sig_value) print 'len(fight_df)[anger sig.val='+str(sig_value)+'] =', float(len(sig_anger_fight_emo_df))/len(fight_emo_df) print 'len(normal_df)[anger sig.val='+str(sig_value)+'] =', float(len(sig_anger_normal_emo_df))/len(normal_emo_df) print '----------------------------------' sig_disgust_fight_emo_df = Filtering.filtering_by_significant_disgust_emo_value(fight_emo_df, sig_value) sig_disgust_normal_emo_df = Filtering.filtering_by_significant_disgust_emo_value(normal_emo_df, sig_value) print 'len(fight_df)[disgust sig.val='+str(sig_value)+'] =', float(len(sig_disgust_fight_emo_df))/len(fight_emo_df) print 'len(normal_df)[disgust sig.val='+str(sig_value)+'] =', float(len(sig_disgust_normal_emo_df))/len(normal_emo_df) print '----------------------------------' sig_fear_fight_emo_df = Filtering.filtering_by_significant_fear_emo_value(fight_emo_df, sig_value) sig_fear_normal_emo_df = Filtering.filtering_by_significant_fear_emo_value(normal_emo_df, sig_value) print 'len(fight_df)[fear sig.val='+str(sig_value)+'] =', float(len(sig_fear_fight_emo_df))/len(fight_emo_df) print 'len(normal_df)[fear sig.val='+str(sig_value)+'] =', float(len(sig_fear_normal_emo_df))/len(normal_emo_df) print '----------------------------------' sig_sadness_fight_emo_df = Filtering.filtering_by_significant_sadness_emo_value(fight_emo_df, sig_value) sig_sadness_normal_emo_df = Filtering.filtering_by_significant_sadness_emo_value(normal_emo_df, sig_value) print 'len(fight_df)[sadness sig.val='+str(sig_value)+'] =', float(len(sig_sadness_fight_emo_df))/len(fight_emo_df) print 'len(normal_df)[sadness sig.val='+str(sig_value)+'] =', float(len(sig_sadness_normal_emo_df))/len(normal_emo_df) print '----------------------------------'
def plot_feature_vs_date(media_data, media_names, filename, colors): plt.style.use('fivethirtyeight') for feature in media_data[0].columns: if isinstance(media_data[0].iloc[0][feature], int) or \ isinstance(media_data[0].iloc[0][feature], float): means = [] errs = [] plt.figure(figsize=(10, 4)) for newssite_data, color in izip(media_data, colors): plt.subplot(1, 2, 1) data_grouped_by_month = Filtering.group_by_month(newssite_data) plt.plot(data_grouped_by_month.index, data_grouped_by_month[feature], 'o-', color=color, alpha=0.5) means.append(newssite_data[feature].mean()) errs.append(newssite_data[feature].std() / np.sqrt(len(newssite_data))) label_fig_xaxis_date(feature, media_names) plt.subplot(1, 2, 2) create_errorbar_graph_of_mean_feature_values( means, errs, media_names, feature, colors) plt.tight_layout() plt.savefig(filename.format(feature), facecolor='white') plt.close()
def __init__(self, ecg_signal, sample_rate, start_index=0, stop_index=None, epoch_len=25): """Runs algorithm from Redmond, Lovell, Basilakis, & Celler (2008) to flag regions of high- and low-quality ECG data. Original algorithm used 500Hz, single-lead ECG data recorded in 25-second segments. The stated accuracy is 89% sensitivity, 98% specificity, 98% PPV, and 97% NPV. :argument -ecg_signal: list/array of raw ECG data -sample_rate: Hz -start_index: corresponding to index in ecg_signal -stop_index: None or "end". If None, segment of data will be cropped using start_index and epoch_len. If "end", will use entire file starting from start_index -epoch_len: interval with which data are processed. seconds. """ self.epoch_len = epoch_len self.sample_rate = sample_rate self.start_index = start_index self.stop_index = start_index + sample_rate * epoch_len if stop_index is None else -1 self.raw = ecg_signal[self.start_index:self.stop_index] self.filt = Filtering.filter_signal(data=self.raw, filter_type="bandpass", filter_order=5, low_f=.7, high_f=33, sample_f=self.sample_rate) self.clipping = None self.highf_mask = None self.highf_data = None self.lowf_mask = None self.lowf_data = None self.final_mask = None self.final_mask_epoch = None self.highf_thresh = 30 self.lowf_thresh = 10 """RUNS METHODS"""
def get_df_and_grouped_df(newssite, filter_date=False, min_date=None): ''' Returns both filtered data and grouped by author data ''' df = open_and_filter_data(newssite, filter_date, min_date) grouped_by_author_df = Filtering.group_by_author(df) return df, grouped_by_author_df
def get_gender_percentages(data): unknown_data = data[data.author_gender == 'unknown'] female_data, male_data = Filtering.seperate_by_gender(data) percent_unknown = len(unknown_data) / len(data) percent_female = len(female_data) / len(data) percent_male = len(male_data) / len(data) return percent_female, percent_male, percent_unknown
def getColumnData(self, columnIndex=None, columnName=None, filters=None): """Return the data in a list for this col, filters is a tuple of the form (key,value,operator,bool)""" if columnIndex is not None and columnIndex < len(self.columnNames): columnName = self.getColumnName(columnIndex) names = Filtering.doFiltering(searchfunc=self.filterBy, filters=filters) coldata = [self.data[n][columnName] for n in names] return coldata
def getColumnData(self, columnIndex=None, columnName=None, filters=None): """Return the data in a list for this col, filters is a tuple of the form (key,value,operator,bool)""" if columnIndex != None and columnIndex < len(self.columnNames): columnName = self.getColumnName(columnIndex) names = Filtering.doFiltering(searchfunc=self.filterBy, filters=filters) coldata = [self.data[n][columnName] for n in names] return coldata
def open_and_filter_data(newssite, filter_date=False, min_year=None): ''' Opens data and filters it. ''' df = pd.read_json('feature_data/{}_features.json'.format(newssite)) print '____________________{}____________________'.format(newssite) print '{} datapoints retreived'.format(len(df)) df = Filtering.filter_data(df, filter_date, min_year) return df
def filter_data(raw_data): print("\nRunning .25-20Hz band-pass filter...") d = Filtering.filter_signal(data=raw_data, low_f=.25, high_f=20, filter_order=5, filter_type='bandpass', sample_f=125) print("Complete.") return d
def antialias_filter(self): """Applies 0.01 - 7.0 Hz bandpass filter to prevent aliasing.""" # STEP 1: 0.01-7Hz BP FILTERING ============================================================================== print("\n" + "Applying 0.01-7Hz bandpass filter...") self.step1_filter = Filtering.filter_signal(data=self.accel30hz, type="bandpass", low_f=0.01, high_f=7, filter_order=1, sample_f=self.sample_rate) self.mag_start_step1_filter = Filtering.filter_signal( data=self.mag_start_30hz, type="bandpass", low_f=0.01, high_f=7, filter_order=1, sample_f=self.sample_rate) print("Complete.")
def filter_data(object, cutoff=.05): """Lowpass filtering to remove non-gravitational component of each axis. :argument -object: EDF file object from import_devs() function -cutoff: cutoff frequency, Hz """ object.x_filt = Filtering.filter_signal(data=object.x, low_f=cutoff, filter_type='lowpass', filter_order=3, sample_f=object.sample_rate) object.y_filt = Filtering.filter_signal(data=object.y, low_f=cutoff, filter_type='lowpass', filter_order=3, sample_f=object.sample_rate) object.z_filt = Filtering.filter_signal(data=object.z, low_f=cutoff, filter_type='lowpass', filter_order=3, sample_f=object.sample_rate)
def actigraph_filter(self): """Applies 0.29 - 1.63 Hz bandpass filter to match on-board ActiGraph processing.""" # STEP 2: 0.29-1.63Hz BANDPASS FILTERING ===================================================================== print("\n" + "Applying mystical Step #2 filter...") self.step2_filter = Filtering.filter_signal(data=self.step1_filter, type="bandpass", low_f=0.29, high_f=1.63, filter_order=1, sample_f=30) self.mag_start_step2_filter = Filtering.filter_signal( data=self.mag_start_step1_filter, type="bandpass", low_f=0.29, high_f=1.63, filter_order=1, sample_f=30) print("Complete.")
def prediction(evidence_data_add, prior, start_day, end_day): # you need to implement this method. x_prob_rain = [] # x_prob_sunny[i] = 1 - x_prob_rain[i] # get filtering probability of the last day(day 100) filter_last = Filtering.filtering(evidence_data_add,prior,100)[-1] # based on the probability of day 100, predict the future day by day for i in range(start_day, end_day+1): x_prob_rain.append(filter_last*0.7+(1-filter_last)*0.3) filter_last = x_prob_rain[-1] return x_prob_rain
def posFilter(location): location = session['username'] + '/' + location rf = request.form for key in rf.keys(): data = key data_dic = json.loads(data) brand = data_dic['Brand'] city = data_dic['City'] product = data_dic['Product'] store = data_dic['Store'] country = data_dic['Country'] startdate = "-".join(data_dic['StartDate'].split('-')[::-1]) if startdate == '': startdate = 'All' enddate = "-".join(data_dic['EndDate'].split('-')[::-1]) if enddate == '': enddate = 'All' filteredData = Filtering.applyFilters(location, startdate, enddate, country, city, store, brand, product) return jsonify({ 'totalSales': KPIs.total_sales(filteredData, 1), 'salesPerStore': KPIs.sales_per_store(filteredData, 1), 'totalOnhandAmount': KPIs.total_onhand_amount(filteredData, 1), 'totalOnhandUnits': KPIs.total_onhand_units(filteredData, 1), 'averageInventoryAmount': KPIs.average_inventory_amount(filteredData, 1), 'distinctProductsSold': KPIs.ditinct_products_sold(filteredData, 1), 'outOfStock': KPIs.out_of_stock(filteredData, 1), 'totalsalesPerDay': json.loads(charts.totalsales_per_day(location)), 'totalsalesPerCountry': json.loads(charts.totalsales_per_country(location)), 'topProducts': json.loads(charts.top_products(location)), 'topBrands': json.loads(charts.top_brands(location)), 'totalsalesPerCity': json.loads(charts.totalsales_per_city(location)), 'totalsalesPerStore': json.loads(charts.totalsales_per_store(location)), 'salesPercentageForBrand': json.loads(charts.sales_percentage_for_brand(location)) })
def filter_epoched_data(self, col_name=None, fs=1, filter_type="lowpass", low_f=0.05, high_f=10): if filter_type != "bandpass": print("\nFiltering {} with {}Hz {} filter...".format(col_name, low_f, filter_type)) if filter_type == "bandpass": print("\nFiltering {} with {}-{}Hz {} filter...".format(col_name, low_f, high_f, filter_type)) filtered = Filtering.filter_signal(data=self.df_epoch[col_name], filter_type=filter_type, sample_f=fs, low_f=low_f, high_f=high_f) filtered = [i if i >= 0 else 0 for i in filtered] self.df_epoch[col_name + "_Filt"] = filtered print("Complete.")
def plot_gender_vs_time(data): female_data, male_data = Filtering.seperate_by_gender(data) female_data = count_by_year(female_data) male_data = count_by_year(male_data) plt.plot(female_data.index, female_data['article_len'], 'o-') plt.plot(male_data.index, male_data['article_len'], 'o-') min_year = female_data.index.min() max_year = female_data.index.max() plt.xlim([min_year, max_year]) plt.legend(['Female', 'Male']) plt.savefig('images/gender_by_time.jpg', facecolor='whitesmoke') plt.close()
def read_emotions(mode, filter_value): databases = ['ms_emotions_db'] df_list = [] for db_name in databases: client = MongoClient() db = client[db_name] collections = db.collection_names(include_system_collections=False) if mode == 'Fight': collections = [x for x in collections if x[-10] != '0'] else: collections = [x for x in collections if x[-10] == '0'] for collection_name in collections: collection = db[collection_name] df_list.append(pd.DataFrame(list(collection.find()))) inst_df = read_instagram_posts(mode) inst_df = Filtering.filtering_by_users_on_photo(inst_df, filter_value) df = pd.concat(df_list, ignore_index=True) df = df[df['post_id'].isin(inst_df['id'].get_values())] # df.drop(['_id', 'post_id' ], 1, inplace=True) return df
def import_file(self): """Method that loads voltage channel, sample rate, starttime, and file duration. Creates timestamp for each data point.""" t0 = datetime.now() print("\n" + "Importing {}...".format(self.filepath)) file = pyedflib.EdfReader(self.filepath) self.sample_rate = file.getSampleFrequencies()[0] self.accel_sample_rate = file.getSampleFrequencies()[1] # READS IN ECG DATA =========================================================================================== if self.end_offset == 0: print("Importing file from index {} to the end...".format( self.start_offset)) self.raw = file.readSignal(chn=0, start=self.start_offset) if self.load_accel: self.x = file.readSignal( chn=1, start=int(self.start_offset * self.accel_sample_rate / self.sample_rate)) self.y = file.readSignal( chn=2, start=int(self.start_offset * self.accel_sample_rate / self.sample_rate)) self.z = file.readSignal( chn=3, start=int(self.start_offset * self.accel_sample_rate / self.sample_rate)) if self.end_offset != 0: print("Importing file from index {} to {}...".format( self.start_offset, self.start_offset + self.end_offset)) self.raw = file.readSignal(chn=0, start=self.start_offset, n=self.end_offset) if self.load_accel: self.x = file.readSignal( chn=1, start=int(self.start_offset * self.accel_sample_rate / self.sample_rate), n=int(self.end_offset * self.accel_sample_rate / self.sample_rate)) self.y = file.readSignal( chn=2, start=int(self.start_offset * self.accel_sample_rate / self.sample_rate), n=int(self.end_offset * self.accel_sample_rate / self.sample_rate)) self.z = file.readSignal( chn=3, start=int(self.start_offset * self.accel_sample_rate / self.sample_rate), n=int(self.end_offset * self.accel_sample_rate / self.sample_rate)) # Calculates gravity-subtracted vector magnitude. Converts from mg to G # Negative values become zero if self.load_accel: self.vm = (np.sqrt( np.square(np.array([self.x, self.y, self.z])).sum(axis=0)) - 1000) / 1000 self.vm[self.vm < 0] = 0 print("ECG data import complete.") self.starttime = file.getStartdatetime() + timedelta( seconds=self.start_offset / self.sample_rate) self.file_dur = round(file.getFileDuration() / 3600, 3) # Data filtering self.filtered = Filtering.filter_signal(data=self.raw, low_f=self.low_f, high_f=self.high_f, type=self.f_type, sample_f=self.sample_rate, filter_order=3) # TIMESTAMP GENERATION ======================================================================================== t0_stamp = datetime.now() print("\n" + "Creating timestamps...") # Timestamps end_time = self.starttime + timedelta(seconds=len(self.raw) / self.sample_rate) self.timestamps = np.asarray( pd.date_range(start=self.starttime, end=end_time, periods=len(self.raw))) self.epoch_timestamps = self.timestamps[::self.epoch_len * self.sample_rate] t1_stamp = datetime.now() stamp_time = (t1_stamp - t0_stamp).seconds print("Complete ({} seconds).".format(round(stamp_time, 2))) t1 = datetime.now() proc_time = (t1 - t0).seconds print("\n" + "Import complete ({} seconds).".format(round(proc_time, 2)))
def arithmetic(): img = cv2.imread("./3_SaltAndPepperNoise.png", 0) img_filter = [[1 for j in range(5)] for i in range(5)] new_img = Filtering.filter2d(img, img_filter, "arithmetic") cv2.imwrite("3_SaltAndPepperNoise_arithmetic.png", new_img)
def median(): img = cv2.imread("./3_SaltAndPepperNoise.png", 0) img_filter = [[1 for j in range(5)] for i in range(5)] new_img = Filtering.filter2d(img, img_filter, "median") cv2.imwrite("3_SaltAndPepperNoise_median.png", new_img)
def Min(): img = cv2.imread("./3_SaltAndPepperNoise.png", 0) img_filter = [[1 for j in xrange(3)]for i in xrange(3)] new_img = Filtering.filter2d(img, img_filter, "min") cv2.imwrite("3_SaltAndPepperNoise_min.png", new_img)
def median(): img = cv2.imread("./1_GaussianNoise.png", 0) img_filter = [[1 for j in range(5)] for i in range(5)] new_img = Filtering.filter2d(img, img_filter, "median") cv2.imwrite("1_GaussianNoise_median.png", new_img)
def updateImage(): global grayscaled_image # GET SLIDER BAR VALUES canny_thresh_low = cv2.getTrackbarPos(NAME_CANNY_LOWER, WINDOW_CANNY) canny_thresh_high = cv2.getTrackbarPos(NAME_CANNY_UPPER, WINDOW_CANNY) hough_thresh = cv2.getTrackbarPos(NAME_HOUGH_THRESHOLD, WINDOW_HOUGH) horizontal_thresh = cv2.getTrackbarPos(NAME_HORIZON_SLIDER, WINDOW_CANNY) # CANNY FILTER canny_img = Filtering.CannyFilter(grayscaled_image, canny_thresh_low, canny_thresh_high) # REMOVE ABOVE HORIZON removed_horizon_img, horizontal_line = Operations.RemoveAboveHorizon(canny_img, horizontal_thresh - horizon_offset_origin) removed_horizon_img_w_lines = np.copy(cv2.cvtColor(removed_horizon_img, cv2.COLOR_GRAY2BGR)) cv2.line(removed_horizon_img_w_lines, (horizontal_line[0], horizontal_line[1]), (horizontal_line[2], horizontal_line[3]), (0, 0, 255), 2) cv2.imshow(WINDOW_CANNY, removed_horizon_img_w_lines) # HOUGH FILTER lines = Filtering.HoughFilter(removed_horizon_img, hough_thresh) # all_lines_image = Draw.DrawHoughLinesOnImage(lines, rgb_image, (0, 0, 255)) # SEPARATE STREETS, GET THE LINES ASSOCIATED WITH YOUR LANE ONLY, BY ANGLE street_lines = Operations.SeparateStreets(lines) # lane_separated_image = Draw.DrawHoughLinesOnImage(street_lines, rgb_image, (0, 0, 255)) # CLUSTER INTO LEFT AND RIGHT LANE (HOPEFULLY) cluster1, cluster2 = Operations.ClusterHoughPoints(street_lines) cluster11, cluster12 = Operations.ClusterHoughPoints(cluster1) # lane11 = Operations.GetLaneFromStdDeviation(cluster11) # lane12 = Operations.GetLaneFromStdDeviation(cluster12) cluster21, cluster22 = Operations.ClusterHoughPoints(cluster2) # lane21 = Operations.GetLaneFromStdDeviation(cluster21) # lane22 = Operations.GetLaneFromStdDeviation(cluster22) # FIND AND DRAW LINES ASSOCIATED WITH STREETS rgb_image = np.copy(cv2.cvtColor(grayscaled_image, cv2.COLOR_GRAY2BGR)) street_lines_image = Draw.DrawHoughLinesOnImage(cluster11, rgb_image, (255, 255, 0)) street_lines_image = Draw.DrawHoughLinesOnImage(cluster12, street_lines_image, (255, 0, 255)) street_lines_image = Draw.DrawHoughLinesOnImage(cluster21, street_lines_image, (255, 0, 0)) street_lines_image = Draw.DrawHoughLinesOnImage(cluster22, street_lines_image, (0, 0, 255)) # FIND AND DRAW LANES global left_outer, left_inner, right_inner, right_outer left_outer, left_inner, right_inner, right_outer = Operations. \ DetermineLanes(cluster1, cluster2, left_outer, left_inner, right_inner, right_outer) lanes_image = np.copy(rgb_image) # lanes_image = Draw.DrawLaneOnImage(left_lane, lanes_image, (255, 0, 0)) # lanes_image = Draw.DrawLaneOnImage(right_lane, lanes_image, (255, 0, 255)) lanes_image = Draw.DrawLaneOnImage(left_outer, lanes_image, (255, 0, 0)) lanes_image = Draw.DrawLaneOnImage(left_inner, lanes_image, (255, 255, 0)) lanes_image = Draw.DrawLaneOnImage(right_outer, lanes_image, (0, 255, 0)) lanes_image = Draw.DrawLaneOnImage(right_inner, lanes_image, (0, 0, 255)) # DRAW CENTER LINE IMAGE FOR FRAME lanes_w_center_line = Draw.DrawCenterLine(lanes_image, np.shape(lanes_image)[1] / 2, (0, 255, 0)) # GET AND DRAW CENTER LINE FOR LANE X INTERCEPTS center_point_x, x_left, x_right = Operations.GetCenterPointBetweenLanes(left_inner, right_inner, lanes_w_center_line) lanes_w_center_line = Draw.DrawCenterLine(lanes_w_center_line, center_point_x, (255, 255, 0)) # DISPLAY PERCENT FROM LEFT AND RIGHT LANE ON IMAGE image_width = np.shape(lanes_w_center_line)[1] image_height = np.shape(lanes_w_center_line)[0] percent_from_left = (image_width / 2 - x_left) / (x_right - x_left) percent_from_right = (image_width / 2 - x_right) / (x_right - x_left) left_text = 'Left Lane = ' + str(int(percent_from_left * 100)) right_text = 'Right Lane = ' + str(int(percent_from_right * 100)) cv2.putText(lanes_w_center_line, left_text, (0, image_height - 3), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), thickness=2) cv2.putText(lanes_w_center_line, right_text, (300, image_height - 3), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), thickness=2) # SHOW HOUGH LINE IMAGES images = np.array([street_lines_image, lanes_w_center_line]) smaller_images = Draw.ScaleAndStackImages(images, 0.5) cv2.imshow(WINDOW_HOUGH, smaller_images)
def high_freq_mask(self, threshold=30): """Checks for signal content in the high frequency range due to EMG artefact in 1-s increments. Procedure: -Runs 60Hz notch filter to remove AC noise -Data are filtered using a 5th order 40Hz highpass filter -Filtered data are squared -Lowpass filter using a .05-second hamming window filter -Square root is taken to return to original units -A mask is created using a threshold of 30 mV """ print("\nChecking high-frequency content...") self.highf_thresh = threshold # 60Hz notch filter notch_filt = Filtering.filter_signal(data=self.raw, notch_f=60, filter_type='notch', sample_f=self.sample_rate, filter_order=5) # 40Hz 5th order highpass filter high_p = Filtering.filter_signal(data=notch_filt, filter_order=5, filter_type='highpass', high_f=40, sample_f=self.sample_rate) # Squares high-passed data high_p2 = high_p * high_p # Lowpass Hamming filter -- UNSURE IF CORRECT jump_size = int(.05 * self.sample_rate) """hamm = scipy.signal.hamming(M=jump_size) hamm_filt = [] for i in range(0, len(high_p2)-jump_size, jump_size): data = high_p2[i:i+jump_size] data_hamm = data * hamm l = np.zeros(len(self.raw)) l[i:i+jump_size] = data_hamm hamm_filt.append(l) hamm_data = np.array(hamm_filt).sum(axis=0)""" # Runs .05Hz lowpass filter cause I'm not sure what a ".05 second normalized Hamming window filter" is hamm_data = Filtering.filter_signal(data=high_p2, filter_order=5, filter_type='lowpass', low_f=.1, sample_f=self.sample_rate) # Square root square_root = np.sqrt(hamm_data) # Output. Above threshold is flagged for high_f high_mask = np.array([0 if i <= threshold else 1 for i in square_root]) for i in range(0, len(high_mask), self.sample_rate): if 0 in high_mask[i:i+self.sample_rate]: high_mask[i:i+self.sample_rate] = 0 self.highf_mask = high_mask self.highf_data = square_root print("Complete.")
def ms_emo_significant_analysis(sig_value): fight_emo_df = LoadingData.read_microsoft_emotions_to_dataframe('Fight', include_non_emotions_column=True) normal_emo_df = LoadingData.read_microsoft_emotions_to_dataframe('Normal', include_non_emotions_column=True) fight_emo_df *= 100 normal_emo_df *= 100 print 'len(fight_df)=', len(fight_emo_df) print 'len(normal_df)=', len(normal_emo_df) print '----------------------------------' sig_anger_fight_emo_df = Filtering.filtering_by_significant_anger_emo_value(fight_emo_df, sig_value) sig_anger_normal_emo_df = Filtering.filtering_by_significant_anger_emo_value(normal_emo_df, sig_value) print 'len(fight_df)[anger sig.val='+str(sig_value)+'] =', float(len(sig_anger_fight_emo_df))/len(fight_emo_df) print 'len(normal_df)[anger sig.val='+str(sig_value)+'] =', float(len(sig_anger_normal_emo_df))/len(normal_emo_df) print '----------------------------------' sig_contempt_fight_emo_df = Filtering.filtering_by_significant_contempt_emo_value(fight_emo_df, sig_value) sig_contempt_normal_emo_df = Filtering.filtering_by_significant_contempt_emo_value(normal_emo_df, sig_value) print 'len(fight_df)[contempt sig.val='+str(sig_value)+'] =', float(len(sig_contempt_fight_emo_df))/len(fight_emo_df) print 'len(normal_df)[contempt sig.val='+str(sig_value)+'] =', float(len(sig_contempt_normal_emo_df))/len(normal_emo_df) print '----------------------------------' sig_disgust_fight_emo_df = Filtering.filtering_by_significant_disgust_emo_value(fight_emo_df, sig_value) sig_disgust_normal_emo_df = Filtering.filtering_by_significant_disgust_emo_value(normal_emo_df, sig_value) print 'len(fight_df)[disgust sig.val='+str(sig_value)+'] =', float(len(sig_disgust_fight_emo_df))/len(fight_emo_df) print 'len(normal_df)[disgust sig.val='+str(sig_value)+'] =', float(len(sig_disgust_normal_emo_df))/len(normal_emo_df) print '----------------------------------' sig_fear_fight_emo_df = Filtering.filtering_by_significant_fear_emo_value(fight_emo_df, sig_value) sig_fear_normal_emo_df = Filtering.filtering_by_significant_fear_emo_value(normal_emo_df, sig_value) print 'len(fight_df)[fear sig.val='+str(sig_value)+'] =', float(len(sig_fear_fight_emo_df))/len(fight_emo_df) print 'len(normal_df)[fear sig.val='+str(sig_value)+'] =', float(len(sig_fear_normal_emo_df))/len(normal_emo_df) print '----------------------------------' sig_sadness_fight_emo_df = Filtering.filtering_by_significant_sadness_emo_value(fight_emo_df, sig_value) sig_sadness_normal_emo_df = Filtering.filtering_by_significant_sadness_emo_value(normal_emo_df, sig_value) print 'len(fight_df)[sadness sig.val='+str(sig_value)+'] =', float(len(sig_sadness_fight_emo_df))/len(fight_emo_df) print 'len(normal_df)[sadness sig.val='+str(sig_value)+'] =', float(len(sig_sadness_normal_emo_df))/len(normal_emo_df) print '----------------------------------' sig_happiness_fight_emo_df = Filtering.filtering_by_significant_happiness_emo_value(fight_emo_df, sig_value) sig_happiness_normal_emo_df = Filtering.filtering_by_significant_happiness_emo_value(normal_emo_df, sig_value) print 'len(fight_df)[happiness sig.val='+str(sig_value)+'] =', float(len(sig_happiness_fight_emo_df))/len(fight_emo_df) print 'len(normal_df)[happiness sig.val='+str(sig_value)+'] =', float(len(sig_happiness_normal_emo_df))/len(normal_emo_df) print '----------------------------------'
def harmonic(): img = cv2.imread("./2_SaltNoise.png", 0) img_filter = [[1 for j in xrange(3)]for i in xrange(3)] new_img = Filtering.filter2d(img, img_filter, "harmonic") cv2.imwrite("1_SaltNoise_harmonic.png", new_img)
import preprocesstweets import Filtering import csv import nltk classify #Read the tweets one by one and process it inpTweets = open('full_training_dataset5.csv', 'r') reader=csv.reader(inpTweets,delimiter=',') stopWords = Filtering.getStopWordList('stopwords.txt') featureList = [] # Get tweet words tweets = [] for row in reader: print(row) sentiment = row[0] tweet = row[1] processedTweet = preprocesstweets.processTweet(tweet) featureVector = Filtering.getFeatureVector(processedTweet) featureList.extend(featureVector) tweets.append((featureVector, sentiment)) #end loop # Remove featureList duplicates featureList = list(set(featureList)) # Extract feature vector for all tweets in one shote training_set = nltk.classify.util.apply_features('feature_list.txt', tweets) NBClassifier = nltk.NaiveBayesClassifier.train(training_set)
def plot_features_by_gender(data, filename, colors): female_df, male_df = Filtering.seperate_by_gender(data) print 'number of data: {}'.format(len(male_df) + len(female_df)) plot_feature_vs_date([female_df, male_df], ['female', 'male'], filename, colors)
def geometric(): img = cv2.imread("./1_GaussianNoise.png", 0) img_filter = [[1 for j in xrange(5)]for i in xrange(5)] new_img = Filtering.filter2d(img, img_filter, "geometric") cv2.imwrite("1_GaussianNoise_geometric.png", new_img)
def calc_descriptive(annotations, signal, folder, subfolder): """Function that calculates descriptive statistics and runs cumulative FFT on raw and Wiener-filtered data for each section of specified data. :argument -annotations: dataframe of signal quality annotations. -signal: array of ECG signal -folder: pathway to subject's folder -subfolder: name of subject's specific collection folder that contains relevant files """ # Empty data to be populated range_list_f = [] # voltage range of Wiener-filtered data sd_list_f = [] # voltage SD of Wiener-filtered data range_list = [] # voltage range of raw data (.25Hz highpass filtered) sd_list = [] # voltage SD of raw data (.25Hz highpass filtered) # Cumulative FFT dataframe for raw data df_fft_raw = pd.DataFrame([[], [], [], [], []]).transpose() df_fft_raw.columns = ["Percent", "Freq", "ID", "category", "event"] # Cumulative FFT dataframe for Wiener-filtered data df_fft_w = pd.DataFrame([[], [], [], [], []]).transpose() df_fft_w.columns = ["Percent", "Freq", "ID", "category", "event"] # Loops through each flagged data segment for row in annotations.itertuples(): d = signal.iloc[int(row.start_idx):int(row.end_idx)] # Subsection of data # Only includes segments of data at least 5-seconds long if d.shape[0] >= (fs*5): # Wiener filtered desc = d["signal_AWWF"].describe() range_list_f.append(desc["max"] - desc['min']) sd_list_f.append(desc["std"]) del desc # Highpass filtered to remove baseline wander desc = pd.Series(Filtering.filter_signal(data=d["signal_raw"], sample_f=fs, high_f=.25, filter_order=3, filter_type='highpass')).describe() range_list.append(desc["max"] - desc['min']) sd_list.append(desc["std"]) """Cumulative FFT data: calculates frequencies that account for np.arange(10, 101, 10)% of signal power""" # FFT data: raw (.25Hz highpass) raw, cs_raw = cumulative_fft(signal=Filtering.filter_signal(data=d["signal_raw"], sample_f=fs, high_f=.25, filter_order=3, filter_type='highpass')) cs_raw["ID"] = [f"{folder}_{subfolder}" for i in range(cs_raw.shape[0])] cs_raw["category"] = [row.quality for i in range(cs_raw.shape[0])] cs_raw["event"] = [row.Index + 1 for i in range(cs_raw.shape[0])] df_fft_raw = df_fft_raw.append(cs_raw) # FFT data: Wiener filtered data w, cs_w = cumulative_fft(signal=d["signal_AWWF"]) cs_w["ID"] = [f"{folder}_{subfolder}" for i in range(cs_w.shape[0])] cs_w["category"] = [row.quality for i in range(cs_w.shape[0])] cs_w["event"] = [row.Index + 1 for i in range(cs_w.shape[0])] df_fft_w = df_fft_w.append(cs_w) if d.shape[0] < (fs*5): range_list_f.append(None) sd_list_f.append(None) range_list.append(None) sd_list.append(None) # Adds columns with descriptive stats to annotations df annotations["range_AWWF"] = range_list_f annotations['sd_AWWF'] = sd_list_f annotations["range_raw"] = range_list annotations['sd_raw'] = sd_list return df_fft_raw, df_fft_w
def distrib_graphs(mode, filter_value): df = LoadingData.read_instagram_posts_to_dataframe(mode) emo_df = LoadingData.read_microsoft_emotions_to_dataframe(mode, include_non_emotions_column=True) df = Filtering.filtering_by_users_on_photo(df, filter_value) emo_df = emo_df[emo_df['post_id'].isin(df['id'].get_values())] data = emo_df.drop(['_id', 'post_id'], axis=1) signs = list(data.columns) for sign in signs: if sign == 'anger': distr_data = data[sign].get_values() distr_data *= 100 f, axis = plt.subplots(1, 1, sharex=True) plt.title(sign) if mode == 'Fight': axis.hist(distr_data, color='#DC143C') f.savefig( '/media/vasiliy/66E473BDE4738DD5/StadiumProject/Graphs/Microsoft/Emotions_distribution/Filtered_by_users/gh' + str( filter_value) + '/' + mode + '_' + sign + '.png', format='png', orientation='landscape') else: axis.hist(distr_data, color='#000080') f.savefig( '/media/vasiliy/66E473BDE4738DD5/StadiumProject/Graphs/Microsoft/Emotions_distribution/Filtered_by_users/gh' + str( filter_value) + '/' + mode + '_' + sign + '.png', format='png', orientation='landscape') elif sign == 'contempt': distr_data = data[sign].get_values() distr_data *= 100 f, axis = plt.subplots(1, 1, sharex=True) plt.title(sign) if mode == 'Fight': axis.hist(distr_data, color='#DC143C') f.savefig( '/media/vasiliy/66E473BDE4738DD5/StadiumProject/Graphs/Microsoft/Emotions_distribution/Filtered_by_users/gh' + str( filter_value) + '/' + mode + '_' + sign + '.png', format='png', orientation='landscape') else: axis.hist(distr_data, color='#000080') f.savefig( '/media/vasiliy/66E473BDE4738DD5/StadiumProject/Graphs/Microsoft/Emotions_distribution/Filtered_by_users/gh' + str( filter_value) + '/' + mode + '_' + sign + '.png', format='png', orientation='landscape') elif sign == 'disgust': distr_data = data[sign].get_values() distr_data *= 100 f, axis = plt.subplots(1, 1, sharex=True) plt.title(sign) if mode == 'Fight': axis.hist(distr_data, color='#DC143C') f.savefig( '/media/vasiliy/66E473BDE4738DD5/StadiumProject/Graphs/Microsoft/Emotions_distribution/Filtered_by_users/gh' + str( filter_value) + '/' + mode + '_' + sign + '.png', format='png', orientation='landscape') else: axis.hist(distr_data, color='#000080') f.savefig( '/media/vasiliy/66E473BDE4738DD5/StadiumProject/Graphs/Microsoft/Emotions_distribution/Filtered_by_users/gh' + str( filter_value) + '/' + mode + '_' + sign + '.png', format='png', orientation='landscape') elif sign == 'fear': distr_data = data[sign].get_values() distr_data *= 100 f, axis = plt.subplots(1, 1, sharex=True) plt.title(sign) if mode == 'Fight': axis.hist(distr_data, color='#DC143C') f.savefig( '/media/vasiliy/66E473BDE4738DD5/StadiumProject/Graphs/Microsoft/Emotions_distribution/Filtered_by_users/gh' + str( filter_value) + '/' + mode + '_' + sign + '.png', format='png', orientation='landscape') else: axis.hist(distr_data, color='#000080') f.savefig( '/media/vasiliy/66E473BDE4738DD5/StadiumProject/Graphs/Microsoft/Emotions_distribution/Filtered_by_users/gh' + str( filter_value) + '/' + mode + '_' + sign + '.png', format='png', orientation='landscape') elif sign == 'neutral': distr_data = data[sign].get_values() distr_data *= 100 f, axis = plt.subplots(1, 1, sharex=True) plt.title(sign) if mode == 'Fight': axis.hist(distr_data, color='#DC143C') f.savefig( '/media/vasiliy/66E473BDE4738DD5/StadiumProject/Graphs/Microsoft/Emotions_distribution/Filtered_by_users/gh' + str( filter_value) + '/' + mode + '_' + sign + '.png', format='png', orientation='landscape') else: axis.hist(distr_data, color='#000080') f.savefig( '/media/vasiliy/66E473BDE4738DD5/StadiumProject/Graphs/Microsoft/Emotions_distribution/Filtered_by_users/gh' + str( filter_value) + '/' + mode + '_' + sign + '.png', format='png', orientation='landscape') elif sign == 'sadness': distr_data = data[sign].get_values() distr_data *= 100 f, axis = plt.subplots(1, 1, sharex=True) plt.title(sign) if mode == 'Fight': axis.hist(distr_data, color='#DC143C') f.savefig( '/media/vasiliy/66E473BDE4738DD5/StadiumProject/Graphs/Microsoft/Emotions_distribution/Filtered_by_users/gh' + str( filter_value) + '/' + mode + '_' + sign + '.png', format='png', orientation='landscape') else: axis.hist(distr_data, color='#000080') f.savefig( '/media/vasiliy/66E473BDE4738DD5/StadiumProject/Graphs/Microsoft/Emotions_distribution/Filtered_by_users/gh' + str( filter_value) + '/' + mode + '_' + sign + '.png', format='png', orientation='landscape') elif sign == 'surprise': distr_data = data[sign].get_values() distr_data *= 100 f, axis = plt.subplots(1, 1, sharex=True) plt.title(sign) if mode == 'Fight': axis.hist(distr_data, color='#DC143C') f.savefig( '/media/vasiliy/66E473BDE4738DD5/StadiumProject/Graphs/Microsoft/Emotions_distribution/Filtered_by_users/gh' + str( filter_value) + '/' + mode + '_' + sign + '.png', format='png', orientation='landscape') else: axis.hist(distr_data, color='#000080') f.savefig( '/media/vasiliy/66E473BDE4738DD5/StadiumProject/Graphs/Microsoft/Emotions_distribution/Filtered_by_users/gh' + str( filter_value) + '/' + mode + '_' + sign + '.png', format='png', orientation='landscape') else: f, axis = plt.subplots(1, 1, sharex=True) distr_data = data[sign].get_values() distr_data *= 100 plt.title(sign) if mode == 'Fight': axis.hist(distr_data, color='#DC143C') f.savefig( '/media/vasiliy/66E473BDE4738DD5/StadiumProject/Graphs/Microsoft/Emotions_distribution/Filtered_by_users/gh' + str( filter_value) + '/' + mode + '_' + sign + '.png', format='png', orientation='landscape') else: axis.hist(distr_data, color='#000080') f.savefig( '/media/vasiliy/66E473BDE4738DD5/StadiumProject/Graphs/Microsoft/Emotions_distribution/Filtered_by_users/gh' + str( filter_value) + '/' + mode + '_' + sign + '.png', format='png', orientation='landscape')
def contraharmonic_Negative(): img = cv2.imread("./2_SaltNoise.png", 0) img_filter = [[1 for j in xrange(3)]for i in xrange(3)] new_img = Filtering.filter2d(img, img_filter, "contraharmonic", -1.5) cv2.imwrite("1_SaltNoise_contraharmonic_negative.png", new_img)
def filterChannels(channels): for channel in channels[1:]: if channel: Filtering.filter(channel)
ecg[int(row.start_idx):int(row.end_idx)] = value return ecg data = ImportEDF.Bittium(filepath="/Users/kyleweber/Desktop/007_OmegaSnap.EDF", start_offset=0, end_offset=0, epoch_len=15, load_accel=False, low_f=1, high_f=25, f_type="bandpass") f = Filtering.filter_signal(data=data.raw, filter_type='bandpass', low_f=.5, high_f=30, filter_order=3, sample_f=data.sample_rate) # No bandpass/notch filtering w = wiener_filter.awwf(f, data.sample_rate) filt, wiener_filt, snr, annots, thresh = qc.annotate_ecg_quality( sample_rate=data.sample_rate, signal=data.raw) annots._annotations["quality"] = [ row.quality.value for row in annots._annotations.itertuples() ] df_annots = annots._annotations.copy() # .05Hz lowpass filter on SNR data snr_filt = Filtering.filter_signal(data=snr,
def low_freq_mask(self, threshold=10): """Checks for periods where there is too much low frequency content that lasts more than 3 seconds. Procedure: -Bandpass filter with passband of .7-33Hz -Signal is squared -Normalized .05-second hamming window filter -Square root is taken -Mask is created using threshold of 10 mV -Sections of low power less than 3 seconds long are ignored """ print("\nChecking low-frequency content...") self.lowf_thresh = threshold # .7-33Hz BP filter # Squares filtered data squared = self.filt * self.filt """" # Hamming window jump_size = int(.05 * self.sample_rate) hamm = scipy.signal.hamming(M=jump_size) hamm_filt = [] for i in range(0, len(squared) - jump_size, jump_size): data = squared[i:i + jump_size] data_hamm = data * hamm l = np.zeros(len(self.raw)) l[i:i + jump_size] = data_hamm hamm_filt.append(l) hamm_data = np.array(hamm_filt).sum(axis=0)""" # Runs .05Hz lowpass filter cause I'm not sure what a ".05 second normalized Hamming window filter" is hamm_data = Filtering.filter_signal(data=squared, filter_order=5, filter_type='lowpass', low_f=.1, sample_f=self.sample_rate) # Square root or Hamming windowed data square_root = np.sqrt(hamm_data) # Binary list, compares values to threshold low_mask = np.array([1 if i <= threshold else 0 for i in square_root]) # Removes low_f periods less than 3 seconds long ----------------------------- starts = [] stops = [] # Finds start/stop indexes of low frequency periods for i in range(0, len(low_mask) - 1): if low_mask[i] == 0 and low_mask[i+1] == 1: starts.append(i+1) if low_mask[i] == 1 and low_mask[i+1] == 0: stops.append(i) # Adds index of final data point if collection ends with low frequency detected if len(starts) > len(stops) and low_mask[-1] == 1: stops.append(len(low_mask)) # Removes low power regions less than 3 seconds long indexes = [] for start, stop in zip(starts, stops): if (stop - start) / self.sample_rate >= 3: indexes.append([start, stop]) low_mask_final = np.zeros(len(r.raw)) # Sets detected periods more than 3s long to flagged for period in indexes: try: low_mask_final[period[0]:period[1]] = 1 except IndexError: print(period) self.lowf_mask = low_mask_final self.lowf_data = square_root print("Complete.")
def geometric(): img = cv2.imread("./3_SaltAndPepperNoise.png", 0) img_filter = [[1 for j in xrange(3)]for i in xrange(3)] new_img = Filtering.filter2d(img, img_filter, "geometric") cv2.imwrite("3_SaltAndPepperNoise_geometric.png", new_img)
def plot_segment(segment=None): plt.close("all") if segment is None: rando = random.choice(num_list) num_list.remove(rando) if segment is not None: rando = segment data = ImportEDF.Bittium( filepath="/Users/kyleweber/Desktop/Data/OND07/EDF/OND07_WTL_{}_01_BF.EDF" .format(df["ID"].loc[rando]), start_offset=df["Index"].loc[rando], end_offset=int(15 * 250), epoch_len=15, load_accel=False, low_f=1, high_f=30, f_type="bandpass") filt = Filtering.filter_signal(data=data.raw, sample_f=data.sample_rate, low_f=.6, high_f=30, filter_type='bandpass') r = ECG_Quality_Check.RedmondQC(ecg_signal=data.raw, sample_rate=data.sample_rate, start_index=0, stop_index=None, epoch_len=data.epoch_len) fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, sharex='col', figsize=(10, 6)) ax1.set_title("Row {}, {} % valid".format( rando, round(r.final_mask.count("Valid") * 100 / len(r.final_mask), 2))) ax1.plot(np.arange(0, len(data.raw)) / data.sample_rate, data.raw, color='red', label="Raw") ax1.plot(np.arange(0, len(data.raw)) / data.sample_rate, filt, color='black', label="Filt") ax1.legend() ax2.plot(np.arange(0, len(data.raw)) / data.sample_rate, r.lowf_data, color='dodgerblue', label="Low F Data") ax2.legend() ax3.plot(np.arange(0, len(data.raw)) / data.sample_rate, r.highf_data, color='green', label='High F Data') ax3.axhline(y=30, color='black') ax3.legend() ax4.plot(np.arange(0, len(r.final_mask)) / data.sample_rate, r.final_mask, color='black') ax4.plot(np.arange(0, len(r.lowf_mask)) / data.sample_rate, r.lowf_mask, color='dodgerblue') ax4.plot(np.arange(0, len(r.highf_mask)) / data.sample_rate, r.highf_mask, color='green') return r
import matplotlib.pyplot as plt import Filtering import nwecg.nwecg.ecg_quality as qc file = "/Volumes/nimbal$/OBI/ONDRI@Home/Device Validation Protocols/Bittium Faros/Data Files/007_OmegaSnap.EDF" nw_file = "/Volumes/nimbal$/OBI/ONDRI@Home/Device Validation Protocols/Bittium Faros/Omega Snap Testing/OmegaSnap_Nonwear.xlsx" df_nw = pd.read_excel(nw_file) df_nw = df_nw.loc[df_nw["File"] == file] """Data import""" f = pyedflib.EdfReader(file) ecg = f.readSignal(0) ecg_fs = f.getSampleFrequency(0) ecg_filt = Filtering.filter_signal(data=ecg, sample_f=ecg_fs, low_f=.5, high_f=25, filter_type="bandpass") acc = np.array([f.readSignal(1), f.readSignal(2), f.readSignal(3)]) acc_fs = f.getSampleFrequency(1) temp = f.readSignal(5) temp_fs = f.getSampleFrequency(5) start_time = f.getStartdatetime() f.close() def plot_all_data():