Exemple #1
0
                    filemode = 'w')
batch_size = 20
cdic_size = 5000
worddim = 50
chardim = 50
datafile= "data.txt"
window_size = 0
top_n = 1
em_num = 1
bsize = 10




# word model train and test
data = preprocess.getwData(datafile)
tags = preprocess.tags2dic(map(lambda x:x[1], data))
wdic, wvectors = preprocess.words2dic2("skip_neg10_50", worddim)
padding_id = wdic["<padding>"] = len(wvectors)
pids = [padding_id]
wvectors.append(np.random.randn(worddim))
embedding = wvectors
indexdata = preprocess.raw2num1(data,wdic,tags,0,padding_id)


#record location of every word in sentence
wordloc = []
for item in data:
    wordloc.append([])
    loc = 0
    for word in item[0]:
Exemple #2
0
storedir = sys.argv[1]

if os.path.exists(storedir):
    shutil.rmtree(storedir)
os.mkdir(storedir)


dropout_pro = 0.3
em_num = 1
top_n = 1

#get boson labeled data

batch_size = 10
data = preprocess.getwData("data.txt")
tags = preprocess.tags2dic(map(lambda x:x[1], data))

print "loading pretrained  word vectors ......................."
embeddic = pickle.load(open("pretrain_neg_allmask/premodel_epoch0_half_dic"))
wdic = pickle.load(open("pretrain_neg_allmask/tokendic"))
print "word vectors loading is done .............vector number "+str(len(embeddic["dic_1"]))
padding_id = wdic["<padding>"]
pids = [padding_id]


print "loading pretained model ..............................."
loadedmodel = pickle.load(open("pretrain_neg_allmask/premodel_epoch0_half_w"))


indexdata = preprocess.raw2num1(data,wdic,tags,0,padding_id)
Exemple #3
0
logging.basicConfig(level = logging.DEBUG,
                    filename = 'modelrun.log',
                    filemode = 'w')
batch_size = 20
cdic_size = 5000
worddim = 50
chardim = 50
datafile= "data.txt"
window_size = 0
top_n = 1
em_num = 1
mtype = "word"
bsize = 10

if mtype == "word":
    data = preprocess.getwData(datafile)
    tags = preprocess.tags2dic(map(lambda x:x[1], data))
    wdic, wvectors = preprocess.words2dic2("skip_neg10_50", worddim)
    padding_id = wdic["<padding>"] = len(wvectors)
    pids = [padding_id]
    wvectors.append(np.random.randn(worddim))
    embedding = wvectors
    indexdata = preprocess.raw2num1(data,wdic,tags,0,padding_id)
    indim = worddim

    nerstr = sys.argv[1]
    if os.path.exists(nerstr):
        shutil.rmtree(nerstr)
    os.mkdir(nerstr)
    af = open(nerstr+"/data", 'wb')
    pickle.dump(indexdata,af)