def LDA_initial(makers, date="2014-6-04 23:59:59", filters=True, show=False, no_below=5, no_above=0.75, num_topics=300): print "begin ", makers data = get_all_data(makers) data = FilterDate(data, date) data_title = data["title"].values data_description = data["description"].values print "read data of", makers title_lda = lda_parts(data_title) title_lda.dictionary_corpus(filter=filters, show=show, no_below=no_below, no_above=no_above) # need to change title_lda.LDA_model(num_topics=num_topics, save=("./model/all_title.model"), show=show, set_matrix=False) print "titile's model of ", makers, " made" description_lda = lda_parts(data_description) description_lda.dictionary_corpus(filter=filters, show=show, no_below=no_below, no_above=no_above) # need to change description_lda.LDA_model( num_topics=num_topics, save=("./model/all_description.model"), show=show, set_matrix=False ) print "description's model of ", makers, " made"
def __init__(self,makers,date="2014-6-7 23:59:59",filters = True,show=False,no_below=5, no_above=0.75,num_topics=300): self.categoryID = categoryIDtoMaker() self.makers = makers data = get_all_data(makers) data = FilterDate(data,date) print "data read" self.Auction = Auction(data,"all",filters = filters,show=show,no_below=no_below, no_above=no_above) print "model read"
def test(data_from="2014-6-04 23:59:59",data_to="2014-6-07 23:59:59"): maker_list=['NEC','SONY','FUJITSU','DELL','TOSHIBA'] data = get_all_data(maker_list) test_data = data[(data_from < data.end_time) & (data.end_time < data_to)] true_price = test_data.current_price.values #predict_price = [predict_by_all(a_id) for a_id in test_data.auction_id.values] predict_price =[] for i in range(len(test_data.auction_id.values)): try: price = predict_by_multi.predict_by_all(test_data.auction_id.values[i]) predict_price.append((i,price)) except KeyError ,e: print e pass
def test(data_from="2014-6-04 23:59:59", data_to="2014-6-07 23:59:59"): maker_list = ['NEC', 'SONY', 'FUJITSU', 'DELL', 'TOSHIBA'] data = get_all_data(maker_list) test_data = data[(data_from < data.end_time) & (data.end_time < data_to)] true_price = test_data.current_price.values #predict_price = [predict_by_all(a_id) for a_id in test_data.auction_id.values] predict_price = [] for i in range(len(test_data.auction_id.values)): try: price = predict_by_multi.predict_by_all( test_data.auction_id.values[i]) predict_price.append((i, price)) except KeyError, e: print e pass
def LDA_initial(makers, date="2014-6-04 23:59:59", filters=True, show=False, no_below=5, no_above=0.75, num_topics=300): print "begin ", makers data = get_all_data(makers) data = FilterDate(data, date) data_title = data["title"].values data_description = data["description"].values print "read data of", makers title_lda = lda_parts(data_title) title_lda.dictionary_corpus(filter=filters, show=show, no_below=no_below, no_above=no_above) #need to change title_lda.LDA_model(num_topics=num_topics, save=("./model/all_title.model"), show=show, set_matrix=False) print "titile's model of ", makers, " made" description_lda = lda_parts(data_description) description_lda.dictionary_corpus(filter=filters, show=show, no_below=no_below, no_above=no_above) #need to change description_lda.LDA_model(num_topics=num_topics, save=("./model/all_description.model"), show=show, set_matrix=False) print "description's model of ", makers, " made"
def __init__(self, makers, date="2014-6-04 23:59:59", filters=True, show=False, no_below=5, no_above=0.75, num_topics=300): self.categoryID = categoryIDtoMaker() self.makers = makers data = get_all_data(makers) data = FilterDate(data, date) print "data read" self.Auction = Auction(data, "all", filters=filters, show=show, no_below=no_below, no_above=no_above) print "model read"
def filter_data_by_time(data_from="2014-6-04 23:59:59",data_to="2014-6-07 23:59:59"): maker_list=['NEC','SONY','FUJITSU','DELL','TOSHIBA'] data = get_all_data(maker_list) return data[(data_from < data.end_time) & (data.end_time < data_to)]
def filter_data_by_time(data_from="2014-6-04 23:59:59", data_to="2014-6-07 23:59:59"): maker_list = ['NEC', 'SONY', 'FUJITSU', 'DELL', 'TOSHIBA'] data = get_all_data(maker_list) return data[(data_from < data.end_time) & (data.end_time < data_to)]