def match_topics_lda(x): import operator topics = topics_words_lda tweets_topics = {} for t in range(len(x.tokenized)): tweet = x.tokenized._values[t]+ utils.get_ngrams(2,x.tokenized._values[t]) num_words = {} for i in range(len(topics)): topic = topics[i] num_words.setdefault(topic[0], 0) #tweets_topics.setdefault(topic[0],0) for j in range(len(topic)): w = topic[j] if w in tweet: # weight by invers index in the topic list to give lower weight to less relevant words num_words[topic[0]] += 1/(j+1) if all(value == 0 for value in num_words.values()): current_topic = "uncategorized" else: current_topic = max(num_words.items(), key=operator.itemgetter(1))[0] tweets_topics.setdefault(current_topic+'_'+sentiments[x.sentiment._values[t]], 0) tweets_topics[current_topic+'_'+sentiments[x.sentiment._values[t]]] += 1 #tweets_topics["uncategorized"] = len(x.tokenized) - sum(tweets_topics.values()) return tweets_topics
elif params.combination_type == "ngram-word-lstm": words = saved_params.pop(-1) words_3grams = words[0] words_words = words[1] if params.combination_type == "ngram-word": model = mixed_models(saved_params[0], saved_params[1], params) elif params.combination_type == "ngram-word-lstm": model = mixed_models(saved_params[0], saved_params[1], params, We_initial_lstm=saved_params[2]) lasagne.layers.set_all_param_values(model.final_layer, saved_params) else: words_3grams, We_3gram = utils.get_ngrams(data, params) if params.random_embs: words_words, We_word = utils.get_words(data, params) else: words_words, We_word = utils.get_wordmap(args.wordfile) We_lstm = copy.deepcopy(We_word) if params.combination_type == "ngram-word": model = mixed_models(We_3gram, We_word, params) elif params.combination_type == "ngram-lstm": model = mixed_models(We_3gram, None, params, We_initial_lstm=We_lstm) elif params.combination_type == "word-lstm":
type=float, help="rate of scrambling") parser.add_argument("--sp-model", help="SP model to load for evaluation") args = parser.parse_args() data = get_data(args) if args.load_file is not None: model, epoch = load_model(data, args) print("Loaded model at epoch {0} and resuming training.".format(epoch)) model.train_epochs(start_epoch=epoch) else: if args.ngrams: vocab, vocab_fr = utils.get_ngrams(data, args.share_vocab, n=args.ngrams) else: vocab, vocab_fr = utils.get_words(data, args.share_vocab) if args.model == "avg": model = Averaging(data, args, vocab, vocab_fr) elif args.model == "lstm": model = LSTM(data, args, vocab, vocab_fr) print(" ".join(sys.argv)) print("Num examples:", len(data)) print("Num words:", len(vocab)) if vocab_fr is not None: print("Num fr words:", len(vocab_fr))