def gen_sentiment_vectors(reviews, word_lists, popular_words): sentiment_vectors = dict() for i in reviews: doc_tag = reviews[i]['title'] prev_sentiment = None for line in reviews[i]['reviews']: if line == ("<p>", ) or line == ("</p>", ): continue sentiment = line[0] sentence = line[1] features = f.extract_features(sentence, word_lists, popular_words, doc_tag, prev_sentiment) if sentiment in sentiment_vectors: sentiment_vectors[sentiment] = f.merge_features(sentiment_vectors[sentiment], features) else: sentiment_vectors[sentiment] = features prev_sentiment = sentiment # if i % 20 == 0: # print "Done with " + str(i) for sentiment in sentiment_vectors: sentiment_vectors[sentiment] = f.smooth_features(sentiment_vectors[sentiment]) return sentiment_vectors
def enrichment_array_combined(sp_base, sp_dict_elutfs, cxs, func=np.average, nsp=1, scores=["poisson"], exs=None): """ sp_dict_elutfs: {'Ce': [Ce_elution_1, Ce_elution_2, ...] , ...} """ exs = exs or correlation_enrichment([(i, set(c)) for i, c in enumerate(cxs)]) elutfs = ut.flatten([elutfs for sp, elutfs in sp_dict_elutfs.items()]) ppio = ppi.feature_array(sp_base, elutfs, exs, nsp, scores=scores, extdata=[], do_filter=False) newarr = ppio.arrfeats for sp in sp_dict_elutfs.keys(): newarr = fe.merge_features(newarr, "%s.*" % sp, func, False) return newarr