Ejemplo n.º 1
0
def train(training_list, model_path, format, use_lstm, logfile=None):

    # Read the data into a Document object
    docs = []
    for txt, con in training_list:
        doc_tmp = Document(txt, con)
        docs.append(doc_tmp)

    # file names
    if not docs:
        print 'Error: Cannot train on 0 files. Terminating train.'
        return 1

    # Create a Machine Learning model
    model = ClinerModel(use_lstm)

    # Train the model using the Documents's data
    model.train(docs)

    # Pickle dump
    print '\nserializing model to %s\n' % model_path
    with open(model_path, "wb") as m_file:
        pickle.dump(model, m_file)
    model.log(logfile, model_file=model_path)
    model.log(sys.stdout, model_file=model_path)
Ejemplo n.º 2
0
def train(training_list, model_path, format, logfile=None):
    # Read the data into a Document object
    docs = []
    for txt, con in training_list:
        #try:
            doc_tmp = Document(txt, con)
            docs.append(doc_tmp)
        #except Exception, e:
        #    exit( '\n\tWARNING: Document Exception - %s\n\n' % str(e) )

    # file names
    if not docs:
        print 'Error: Cannot train on 0 files. Terminating train.'
        exit(1)

    # Create a Machine Learning model
    model = ClinerModel()

    # Train the model using the Document's data
    model.fit_from_documents(docs)

    # Pickle dump
    print '\nserializing model to %s\n' % model_path
    with open(model_path, 'wb') as f:
        pickle.dump(model, f)
    model.log(logfile   , model_file=model_path)
    model.log(sys.stdout, model_file=model_path)
Ejemplo n.º 3
0
def train(training_list,
          model_path,
          format,
          use_lstm,
          logfile=None,
          val=[],
          test=[]):

    # Read the data into a Document object
    train_docs = []
    for txt, con in training_list:
        doc_tmp = Document(txt, con)
        train_docs.append(doc_tmp)

    val_docs = []
    for txt, con in val:
        doc_tmp = Document(txt, con)
        val_docs.append(doc_tmp)

    test_docs = []
    for txt, con in test:
        doc_tmp = Document(txt, con)
        test_docs.append(doc_tmp)

    # file names
    if not train_docs:
        print('Error: Cannot train on 0 files. Terminating train.')
        return 1

    # Create a Machine Learning model
    model = ClinerModel(use_lstm)

    # Train the model using the Documents's data
    model.train(train_docs, val=val_docs, test=test_docs)

    # Pickle dump
    print('\nserializing model to %s\n' % model_path)
    with open(model_path, "wb") as m_file:
        pickle.dump(model, m_file)