def main(): parser = argparse.ArgumentParser(description='Run Doc2Vec/GloVe then push those vectors into Scikit-Learn') parser.add_argument('-e', '--epochs', type=int, default=10, help='Number of epochs for training') parser.add_argument('-v', '--verbose', action="store_true", help="Show verbose output") parser.add_argument("-s", "--vecsize", type=int, default=100, help="Vector size") parser.add_argument("-k", default=5, type=int, help="K for cross-validation") parser.add_argument('--nostop', action="store_true", help='Test data path') parser.add_argument('--stem', action="store_true", help='Test data path') parser.add_argument("--dataset", default="sentiment140", help="Which dataset to use") parser.add_argument("--datapath", help="Path to chosen dataset, required for first use.") parser.add_argument("--dvSample", default=0.0001, type=float, help="Doc2Vec sampling.") parser.add_argument("--dvNegative", default=5, help="Doc2Vec negative.") parser.add_argument("--dvMinCount", default=1, help="Doc2Vec min_count.") parser.add_argument("--window", default=1, help="Doc2Vec window.") parser.add_argument("--dvWorkers", default=1, help="Doc2Vec workers.") parser.add_argument("--lsi", action="store_true", help="Use Latent Semantic Indexing.") parser.add_argument("--dataLength", default=None, type=int, help="Use to limit the number of examples used") parser.add_argument("--dataSample", default=None, type=float, help="Use to sample examples from data") parser.add_argument("--nTrees", default=15, type=int, help="Number of trees for Random Forests.") parser.add_argument("--rfFeatures", default="sqrt", choices=["sqrt","log2","auto","all"], help="Number of features for Random Forests.") parser.add_argument("--learningRate", default=0.05, type=float,help="GloVe learning rate.") parser.add_argument("--pca", action="store_true", help="Use pca with GloVe vectors") parser.add_argument('--parallelism', '-p', action='store', default=4, help=('Number of parallel threads to use')) parser.add_argument("--embeddings", choices=["glove","doc2vec"], default="doc2vec", help="Methods to generate vectors from text") args = parser.parse_args() logging.basicConfig(format='%(asctime)-15s: %(message)s', level=logging.INFO) all_data = list( loader.read( args.dataset, dataPath = args.datapath, limit = args.dataLength, sampleRate = args.dataSample ) ) random.shuffle(all_data) test_model( all_data, args )
def console_edit(name, value, type, description=None, world=None, frame=None): print "*********************************************************" print print "Editing resource", name, "of type", type print if description != None: print description print if frame != None: if isinstance(frame, (RigidObjectModel, RobotModelLink)): print "Reference frame:", frame.getName() else: print "Reference frame:", frame print print "*********************************************************" print "Current value:", value print "Do you wish to change it? (y/n/q) >", choice = '' while choice not in ['y', 'n', 'q']: choice = raw_input()[0].lower() if choice not in ['y', 'n', 'q']: print "Please enter y/n/q indicating yes/no/quit." print ">", if choice == 'y': print "Enter the new desired value below. You may use native text," print "JSON strings, or file(fn) to indicate a file name." print "New value >", data = raw_input() if data.startswith('{') or data.startswith('['): jsonobj = json.loads(data) try: obj = loader.fromJson(jsonobj, type) return True, obj except Exception: print "Error loading from JSON, press enter to continue..." raw_input() return False, value elif data.startswith('file('): try: obj = get(data[5:-1], type, doedit=False) if obj == None: return False, value return True, obj except Exception: print "Error loading from file, press enter to continue..." raw_input() return False, value else: try: obj = loader.read(type, data) return True, obj except Exception: print "Error loading from text, press enter to continue..." raw_input() return False, value elif choice == 'n': print "Using current value." print "*********************************************************" return False, value elif choice == 'q': return False, None
def load_table_data(): log.debug("Loading table data...") for table, data in loader.read(): log.debug(table) db_client.load(table, data) db_client.data_loaded = True
def console_edit(name,value,type,description=None,world=None,frame=None): print "*********************************************************" print print "Editing resource",name,"of type",type print if description!=None: print description print if frame!=None: if isinstance(frame,(RigidObjectModel,RobotModelLink)): print "Reference frame:",frame.getName() else: print "Reference frame:",frame print print "*********************************************************" print "Current value:",value print "Do you wish to change it? (y/n/q) >", choice = '' while choice not in ['y','n','q']: choice = raw_input()[0].lower() if choice not in ['y','n','q']: print "Please enter y/n/q indicating yes/no/quit." print ">", if choice=='y': print "Enter the new desired value below. You may use native text," print "JSON strings, or file(fn) to indicate a file name." print "New value >", data = raw_input() if data.startswith('{') or data.startswith('['): jsonobj = json.loads(data) try: obj = loader.fromJson(jsonobj,type) return True,obj except Exception: print "Error loading from JSON, press enter to continue..." raw_input() return False,value elif data.startswith('file('): try: obj = get(data[5:-1],type,doedit=False) if obj==None: return False,value return True,obj except Exception: print "Error loading from file, press enter to continue..." raw_input() return False,value else: try: obj = loader.read(type,data) return True,obj except Exception: print "Error loading from text, press enter to continue..." raw_input() return False,value elif choice=='n': print "Using current value." print "*********************************************************" return False,value elif choice=='q': return False,None
def main(): graph = loader.read() # loads graph print_checks_and_debug.print_nodes(graph) # DEBUG simulate.simulate(graph) # simulation
def main(): parser = argparse.ArgumentParser( description= 'Run Doc2Vec/GloVe then push those vectors into Scikit-Learn') parser.add_argument('-e', '--epochs', type=int, default=10, help='Number of epochs for training') parser.add_argument('-v', '--verbose', action="store_true", help="Show verbose output") parser.add_argument("-s", "--vecsize", type=int, default=100, help="Vector size") parser.add_argument("-k", default=5, type=int, help="K for cross-validation") parser.add_argument('--nostop', action="store_true", help='Test data path') parser.add_argument('--stem', action="store_true", help='Test data path') parser.add_argument("--dataset", default="sentiment140", help="Which dataset to use") parser.add_argument("--datapath", help="Path to chosen dataset, required for first use.") parser.add_argument("--dvSample", default=0.0001, type=float, help="Doc2Vec sampling.") parser.add_argument("--dvNegative", default=5, help="Doc2Vec negative.") parser.add_argument("--dvMinCount", default=1, help="Doc2Vec min_count.") parser.add_argument("--window", default=1, help="Doc2Vec window.") parser.add_argument("--dvWorkers", default=1, help="Doc2Vec workers.") parser.add_argument("--lsi", action="store_true", help="Use Latent Semantic Indexing.") parser.add_argument("--dataLength", default=None, type=int, help="Use to limit the number of examples used") parser.add_argument("--dataSample", default=None, type=float, help="Use to sample examples from data") parser.add_argument("--nTrees", default=15, type=int, help="Number of trees for Random Forests.") parser.add_argument("--rfFeatures", default="sqrt", choices=["sqrt", "log2", "auto", "all"], help="Number of features for Random Forests.") parser.add_argument("--learningRate", default=0.05, type=float, help="GloVe learning rate.") parser.add_argument("--pca", action="store_true", help="Use pca with GloVe vectors") parser.add_argument('--parallelism', '-p', action='store', default=4, help=('Number of parallel threads to use')) parser.add_argument("--embeddings", choices=["glove", "doc2vec"], default="doc2vec", help="Methods to generate vectors from text") args = parser.parse_args() logging.basicConfig(format='%(asctime)-15s: %(message)s', level=logging.INFO) all_data = list( loader.read(args.dataset, dataPath=args.datapath, limit=args.dataLength, sampleRate=args.dataSample)) random.shuffle(all_data) test_model(all_data, args)
def __init__(self, rng, batchsize=100, activation=relu): import loader (numsent, charcnt, wordcnt, maxwordlen, maxsenlen,\ kchr, kwrd, xchr, xwrd, y) = loader.read("tweets_clean.txt") dimword = 30 dimchar = 5 clword = 300 clchar = 50 kword = kwrd kchar = kchr datatrainword,\ datatestword,\ datatrainchar,\ datatestchar,\ targettrain,\ targettest\ = train_test_split(xwrd, xchr, y, random_state=1234, test_size=0.1) xtrainword = theano.shared(np.asarray(datatrainword, dtype='int16'), borrow=True) xtrainchar = theano.shared(np.asarray(datatrainchar, dtype='int16'), borrow=True) ytrain = theano.shared(np.asarray(targettrain, dtype='int8'), borrow=True) xtestword = theano.shared(np.asarray(datatestword, dtype='int16'), borrow=True) xtestchar = theano.shared(np.asarray(datatestchar, dtype='int16'), borrow=True) ytest = theano.shared(np.asarray(targettest, dtype='int8'), borrow=True) self.ntrainbatches = xtrainword.get_value( borrow=True).shape[0] / batchsize self.ntestbatches = xtestword.get_value( borrow=True).shape[0] / batchsize index = T.iscalar() xwrd = T.wmatrix('xwrd') xchr = T.wtensor3('xchr') y = T.bvector('y') train = T.iscalar('train') layercharembedinput = xchr layercharembed = EmbedIDLayer(rng, layercharembedinput, ninput=charcnt, noutput=dimchar) layer1input = layercharembed.output.reshape( (batchsize * maxsenlen, 1, maxwordlen, dimchar)) layer1 = ConvolutionalLayer(rng, layer1input, filter_shape=(clchar, 1, kchar, dimchar), image_shape=(batchsize * maxsenlen, 1, maxwordlen, dimchar)) layer2 = MaxPoolingLayer(layer1.output, poolsize=(maxwordlen - kchar + 1, 1)) layerwordembedinput = xwrd layerwordembed = EmbedIDLayer(rng, layerwordembedinput, ninput=wordcnt, noutput=dimword) layer3wordinput = layerwordembed.output.reshape( (batchsize, 1, maxsenlen, dimword)) layer3charinput = layer2.output.reshape( (batchsize, 1, maxsenlen, clchar)) layer3input = T.concatenate([layer3wordinput, layer3charinput], axis=3) layer3 = ConvolutionalLayer(rng, layer3input, filter_shape=(clword, 1, kword, dimword + clchar), image_shape=(batchsize, 1, maxsenlen, dimword + clchar), activation=activation) layer4 = MaxPoolingLayer(layer3.output, poolsize=(maxsenlen - kword + 1, 1)) layer5input = layer4.output.reshape((batchsize, clword)) layer5 = FullyConnectedLayer(rng, dropout(rng, layer5input, train), ninput=clword, noutput=50, activation=activation) layer6input = layer5.output layer6 = FullyConnectedLayer(rng, dropout(rng, layer6input, train, p=0.1), ninput=50, noutput=2, activation=None) result = Result(layer6.output, y) loss = result.negativeloglikelihood() accuracy = result.accuracy() params = layer6.params\ +layer5.params\ +layer3.params\ +layerwordembed.params\ +layer1.params\ +layercharembed.params updates = RMSprop(learningrate=0.001, params=params).updates(loss) self.trainmodel = theano.function( inputs=[index], outputs=[loss, accuracy], updates=updates, givens={ xwrd: xtrainword[index * batchsize:(index + 1) * batchsize], xchr: xtrainchar[index * batchsize:(index + 1) * batchsize], y: ytrain[index * batchsize:(index + 1) * batchsize], train: np.cast['int32'](1) }) self.testmodel = theano.function( inputs=[index], outputs=[loss, accuracy], givens={ xwrd: xtestword[index * batchsize:(index + 1) * batchsize], xchr: xtestchar[index * batchsize:(index + 1) * batchsize], y: ytest[index * batchsize:(index + 1) * batchsize], train: np.cast['int32'](0) })