filemode = 'w') batch_size = 20 cdic_size = 5000 worddim = 50 chardim = 50 datafile= "data.txt" window_size = 0 top_n = 1 em_num = 1 bsize = 10 # word model train and test data = preprocess.getwData(datafile) tags = preprocess.tags2dic(map(lambda x:x[1], data)) wdic, wvectors = preprocess.words2dic2("skip_neg10_50", worddim) padding_id = wdic["<padding>"] = len(wvectors) pids = [padding_id] wvectors.append(np.random.randn(worddim)) embedding = wvectors indexdata = preprocess.raw2num1(data,wdic,tags,0,padding_id) #record location of every word in sentence wordloc = [] for item in data: wordloc.append([]) loc = 0 for word in item[0]:
storedir = sys.argv[1] if os.path.exists(storedir): shutil.rmtree(storedir) os.mkdir(storedir) dropout_pro = 0.3 em_num = 1 top_n = 1 #get boson labeled data batch_size = 10 data = preprocess.getwData("data.txt") tags = preprocess.tags2dic(map(lambda x:x[1], data)) print "loading pretrained word vectors ......................." embeddic = pickle.load(open("pretrain_neg_allmask/premodel_epoch0_half_dic")) wdic = pickle.load(open("pretrain_neg_allmask/tokendic")) print "word vectors loading is done .............vector number "+str(len(embeddic["dic_1"])) padding_id = wdic["<padding>"] pids = [padding_id] print "loading pretained model ..............................." loadedmodel = pickle.load(open("pretrain_neg_allmask/premodel_epoch0_half_w")) indexdata = preprocess.raw2num1(data,wdic,tags,0,padding_id)
logging.basicConfig(level = logging.DEBUG, filename = 'modelrun.log', filemode = 'w') batch_size = 20 cdic_size = 5000 worddim = 50 chardim = 50 datafile= "data.txt" window_size = 0 top_n = 1 em_num = 1 mtype = "word" bsize = 10 if mtype == "word": data = preprocess.getwData(datafile) tags = preprocess.tags2dic(map(lambda x:x[1], data)) wdic, wvectors = preprocess.words2dic2("skip_neg10_50", worddim) padding_id = wdic["<padding>"] = len(wvectors) pids = [padding_id] wvectors.append(np.random.randn(worddim)) embedding = wvectors indexdata = preprocess.raw2num1(data,wdic,tags,0,padding_id) indim = worddim nerstr = sys.argv[1] if os.path.exists(nerstr): shutil.rmtree(nerstr) os.mkdir(nerstr) af = open(nerstr+"/data", 'wb') pickle.dump(indexdata,af)