def peak_detection_example(ch, experiment): """ Peak detection example. """ ch_data, unit = experiment.get_channel_data(ch) ch_data = split(ch_data, experiment.sample_rate, 1000) ch_data = ch_data[12] dp.detect_peaks(ch_data, show=True)
def peak_detection_summary_example(ch, experiment): """ Peak detection example, plotting the amount of peaks of each bucket. """ ch_data, unit = experiment.get_channel_data(ch) ch_data = split(ch_data, experiment.sample_rate, 1000) threshold = 15 * 1e-6 peaks = [len(dp.detect_peaks(x, mph=threshold)) for x in ch_data] valleys = [ len(dp.detect_peaks(x, valley=True, mph=-threshold)) for x in ch_data ] plt.plot(peaks) plt.plot(valleys) plt.show()
def get_lock_points(self, c): """ Find possible lock points """ if self.is_locked: raise Exception( 'Cannot get lock points while being locked. Unlock first') # zoom out yield self.spectrum_analyzer.frequency_range(*self.capture_range) # find peaks trace = yield self.spectrum_analyzer.trace() trace = np.array(trace) average = np.average(trace) positions = detect_peaks(trace, mph=average + self.peak_threshold, mpd=10) powers = trace[positions] df = (self.capture_range[1] - self.capture_range[0]) / len(trace) frequencies = np.array(positions) * df + self.get_capture_range(c)[0] defer.returnValue([frequencies, powers])
def interpret(type): datapath = './processes/monthly/dataset/' + type + '/' interpret_datapath = './processes/monthly/postproc/' + type + '/' result_file = datapath + 'result.json' with open(result_file) as data_file: result_data = json.load(data_file) all_results = [] for group in result_data: pick_list = [] pick_months = [] pick_toparticles = [] peak_indexes_months = [] pick_theme = ' '.join(group['theme']) print('Interpreter: theme: ', pick_theme) for item in group['groups']: all_documents_title = [] all_documents_text = [] schema = Schema(title=TEXT(stored=True), path=ID(stored=True)) os.makedirs(interpret_datapath + "indexes", exist_ok=True) ix = create_in(interpret_datapath + "indexes", schema) writer = ix.writer() for article in item['articles']: writer.add_document(title=article['title'], path=article['_id']) writer.commit() with ix.searcher() as searcher: query = whoosh.qparser.QueryParser( "title", ix.schema, group=whoosh.qparser.OrGroup).parse(pick_theme) results = searcher.search(query, limit=50) if len(results): article_pick = results print('Clustering: index length:') print(len(article_pick)) for a in article_pick: all_documents_title.append(a['title']) if type != 'trumpsaid': all_documents_text.append(a['content']) pick_list.append(len(item['articles'])) pick_months.append(item['month']) print(pick_list, pick_months, len(all_documents_title)) if type == 'trumpsaid': all_documents_dict = all_documents_title else: all_documents_dict = dict( zip(all_documents_title, all_documents_text)) if len(all_documents_dict) <= 12: pick_toparticles.append([]) print('@@@@@@@@ ERRRR') continue else: if type == 'trumpsaid': parsed_data = vectorize_cluster(all_documents_dict, 2, 15, interpret_datapath, 'titleonly') else: parsed_data = vectorize_cluster(all_documents_dict, 2, 13, interpret_datapath) print('+++++++ PARSING') parsed_data_themes = [] for index, parseditem in enumerate(parsed_data[0]): converted_np = np.array(parsed_data[0][index]).tolist() tokens = [] for token in converted_np: tokens.append(token[0]) parsed_data_themes.append(tokens) #print('Interpreter: all key themes:') #print(parsed_data_themes) stemmer = PorterStemmer() def stem_words(words_list, stemmer): return [stemmer.stem(word) for word in words_list] for index, themeitem in enumerate(parsed_data_themes): zip_data = {} theme_data = [] for token in themeitem: title_tokens = nltk.word_tokenize(' '.join( parsed_data[1][index])) title_stems = stem_words(title_tokens, stemmer) title_token_zip = dict(zip(title_stems, title_tokens)) matched_token = title_token_zip.get(token) # print('Target token: {0} / Matched: {1} / Tokens Dict: {2}'.format(token, matched_token, title_token_zip)) if isinstance(matched_token, str): if len(matched_token) > 1: theme_data.append(matched_token) elif isinstance(matched_token, list): if len(matched_token[0]) > 1: theme_data.append(matched_token[0]) else: if len(token) > 1: theme_data.append(token) print(theme_data) noduplicates = [] for theme in theme_data: if theme not in group['theme']: noduplicates.append(theme) if len(noduplicates) <= 0: querytext = ' '.join(theme_data) else: querytext = ' '.join(noduplicates) article_pick = [] toparticles_matched = [] with ix.searcher() as searcher: query = whoosh.qparser.QueryParser( "title", ix.schema, group=whoosh.qparser.OrGroup).parse(querytext) results = searcher.search(query) if len(results): article_pick = results[0:8] print(article_pick) for a in article_pick: for article in item['articles']: if a['path'] == article['_id']: if 'text' in article: del article['text'] toparticles_matched.append(article) pick_toparticles.append(toparticles_matched) print(toparticles_matched) print('########## DONE: {0}'.format(item['month'])) print( 'Interpreter: Detect peaks with minimum height and distance filters.' ) peak_indexes = detect_peaks.detect_peaks(pick_list, mph=7, mpd=2).tolist() print(dict(zip(pick_months, pick_list))) print('Interpreter: Peaks are: %s' % peak_indexes) for item in peak_indexes: print('Interpreter: Peaks are: %s' % pick_months[int(item)]) peak_indexes_months.append(pick_months[int(item)]) result_pick_data = {} result_pick_data['theme'] = group['theme'] result_pick_data['list'] = pick_list result_pick_data['months'] = pick_months result_pick_data['toparticles'] = pick_toparticles result_pick_data['peaks'] = peak_indexes_months then = datetime.datetime.now(pytz.utc) timeest = str(then.astimezone(pytz.timezone('US/Eastern'))) result_pick_data['timestamp'] = timeest print(result_pick_data) all_results.append(result_pick_data) print('--------- CLUSTER DONE: {0}'.format(group['theme'])) with open(interpret_datapath + 'result.json', 'w') as f: json.dump(all_results, f, indent=4, sort_keys=True)
def postprocess(type): us_eastern_time = pytz.timezone('US/Eastern') datapath = './dataset/' + type + '/' cluster_index_datapath = './cluster/' + type + '/' result_file = datapath + 'result.json' with open(result_file) as data_file: result_data = json.load(data_file) all_results = [] print( '\n\nCLUSTER_POSTPROC: Initiated, total {0} cluster(s) for postprocessing \n' .format(len(result_data))) for group in result_data: pick_list = [] pick_timefiltered = [] pick_toparticles = [] peak_indexes = [] peak_indexes_items = [] peak_indexes_time = [] pick_theme = ' '.join(group['theme']) lda_theme = [] ldathemes_combined = [] # print('CLUSTER_POSTPROC: theme: ', pick_theme) for item in group['groups']: all_documents = [] all_documents_title = [] all_documents_text = [] if type == 'today': schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT(stored=True)) else: schema = Schema(title=TEXT(stored=True), path=ID(stored=True)) os.makedirs(cluster_index_datapath + "indexes", exist_ok=True) ix = create_in(cluster_index_datapath + "indexes", schema) writer = ix.writer() for article in item['articles']: if type == 'today': writer.add_document(title=article['title'], path=article['_id'], content=article['text']) all_documents.append(article) else: writer.add_document(title=article['title'], path=article['_id']) writer.commit() with ix.searcher() as searcher: query = whoosh.qparser.QueryParser( "title", ix.schema, group=whoosh.qparser.OrGroup).parse(pick_theme) results = searcher.search(query, limit=30) if len(results): article_pick = results for a in article_pick: all_documents_title.append(a['title']) if type == 'today': all_documents_text.append(a['content']) pick_list.append(len(item['articles'])) pick_timefiltered.append(item['time_filterby']) print( ' -* CLUSTER_POSTPROC: Total: {0}, Month: {1}, Query Result: {2}' .format(pick_list, pick_timefiltered, len(all_documents_title))) querytext = ' '.join(group['theme']) article_pick = [] toparticles_matched = [] toparticles_matched_text = [] with ix.searcher() as searcher: query = whoosh.qparser.QueryParser( "title", ix.schema, group=whoosh.qparser.OrGroup).parse(querytext) results = searcher.search(query) if len(results): for a in results: for article in item['articles']: if a['path'] == article['_id']: toparticles_matched_text.append( article['title']) article_pick = results[0:5] for a in article_pick: for article in item['articles']: if a['path'] == article['_id']: if 'text' in article: del article['text'] toparticles_matched.append(article) toparticles_matched_text.append( article['title']) pick_toparticles.append(toparticles_matched) print( ' -- CLUSTER_POSTPROC: Extracting features from the dataset /lda' ) from nltk.corpus import stopwords from nltk.stem.wordnet import WordNetLemmatizer import string stop = set(stopwords.words('english')) exclude = set(string.punctuation) lemma = WordNetLemmatizer() def clean(doc): stop_free = " ".join( [i for i in doc.lower().split() if i not in stop]) punc_free = ''.join(ch for ch in stop_free if ch not in exclude) normalized = " ".join( lemma.lemmatize(word) for word in punc_free.split()) return normalized def clean_text(raw_text): letters_only = re.sub('[^a-zA-Z]', ' ', str(raw_text)) words = letters_only.lower().split() cachedStopWords = set(stopwords.words("english")) cachedStopWords.update([ 'periscope', 'pbs', 'newshour', 'npr', 'watch', 'bloomberg', 'says', 'abc', 'news' ]) useful_words = [x for x in words if x not in cachedStopWords] useful_words_string = ' '.join(useful_words) return useful_words_string doc_clean = [ clean_text(doc).split() for doc in toparticles_matched_text ] import gensim from gensim import corpora dictionary = corpora.Dictionary(doc_clean) doc_term_matrix = [dictionary.doc2bow(doc) for doc in doc_clean] Lda = gensim.models.ldamodel.LdaModel ldamodel = Lda(doc_term_matrix, num_topics=3, id2word=dictionary, passes=50) # print(ldamodel.print_topics(num_topics=3, num_words=5)) for idx, topic in ldamodel.show_topics(formatted=False, num_words=3, num_topics=3): lda_theme.append([w[0] for w in topic]) for x in lda_theme: ldathemes_combined.extend(x) ldathemes_combined = np.unique(ldathemes_combined).tolist() print( ' -- CLUSTER_POSTPROC: Detect peaks with minimum height and distance filters.' ) if type == 'today': article_list_peak_pre = [] for item in item['articles']: timeformat = '%Y-%m-%d-%H' # mode <= 48 if 'text' in item: del item['text'] item['time_filter'] = dt.fromtimestamp( item['ts'], us_eastern_time).strftime(timeformat) article_list_peak_pre.append(item) article_list_peak_list = [] article_list_peak_items = [] article_list_peak_timekey = [] sorted_articles = sorted(article_list_peak_pre, key=itemgetter('time_filter')) for key, gp in itertools.groupby(sorted_articles, key=lambda x: x['time_filter']): group_articles = {} group_articles['time_filterby'] = key group_articles['articles'] = list(gp) article_list_peak_list.append(len(group_articles['articles'])) article_list_peak_items.append(group_articles) article_list_peak_timekey.append(key) print( ' -- CLUSTER_POSTPROC: peak time_filterby {0}, {1} article(s)' .format(key, len(group_articles['articles']))) print( ' -- CLUSTER_POSTPROC: peak time_filterby total: {0}'.format( article_list_peak_list)) peak_indexes = detect_peaks.detect_peaks(article_list_peak_list, mph=7, mpd=2).tolist() else: peak_indexes = detect_peaks.detect_peaks(pick_list, mph=7, mpd=2).tolist() for item in peak_indexes: peak_indexes_items.append(article_list_peak_items[int(item)]) peak_indexes_time.append(article_list_peak_timekey[int(item)]) print( ' -- CLUSTER_POSTPROC: peak time_filterby detection result: {0} \n' .format(peak_indexes_time)) result_pick_data = {} result_pick_data['clusterid'] = str(uuid.uuid4()) result_pick_data['topics_tfidf'] = group['theme'] # result_pick_data['topics_lda'] = ldathemes_combined #group['theme'] result_pick_data['topics_lda'] = lda_theme result_pick_data['namedentities'] = group['namedentity'] result_pick_data['counts_total'] = pick_list result_pick_data['counts_highestrank'] = sum( len(x) for x in pick_toparticles) result_pick_data['months'] = pick_timefiltered result_pick_data['item_highestrank'] = pick_toparticles result_pick_data['item_total'] = all_documents result_pick_data['peaks'] = peak_indexes_time result_pick_data['peaks_item'] = peak_indexes_items then = datetime.datetime.now(pytz.utc) timeest = str(then.astimezone(pytz.timezone('US/Eastern'))) result_pick_data['timestamp'] = timeest all_results.append(result_pick_data) print( ' -* CLUSTER_POSTPROC: Postprocessing finished for the cluster: TFIDF {0} / LDA {1} \n\n' .format(group['theme'], lda_theme)) time_file = datetime.datetime.now(pytz.timezone('US/Eastern')) time_file_string = time_file.strftime("%Y%m%d-%H%M") if type == 'today': os.makedirs('../data_publish_ready/' + type + '/', exist_ok=True) final_datapath_today = '../data_publish_ready/' + type + '/' + type + '_data.json' with open(final_datapath_today, 'w') as f: json.dump(all_results, f, indent=4, sort_keys=True) os.makedirs('../data_publish_ready/' + type + '/', exist_ok=True) final_datapath_today = '../data_publish_ready/' + type + '/' + type + '_' + time_file_string + '_data.json' with open(final_datapath_today, 'w') as f: json.dump(all_results, f, indent=4, sort_keys=True) else: final_datapath = '../data_publish_ready/' + type + '/' + 'result.json' with open(final_datapath + 'result.json', 'w') as f: json.dump(all_results, f, indent=4, sort_keys=True)