def __init__(self, word_rep_file=None, pickled_rep_reader=None): if pickled_rep_reader: self.rep_reader = pickled_rep_reader elif word_rep_file: self.rep_reader = RepReader(word_rep_file) else: self.rep_reader = RepReader(elastic=True) self.input_size = self.rep_reader.rep_shape[0] self.tagger = None
def __init__(self, word_rep_file, train=False, cv=True, folds=5, modeltype="mlp", trained_model_name="trained_model.pkl", tagset_file="tagset.pkl"): self.trained_model_name = "%s_%s" % (modeltype, trained_model_name) self.cv = cv self.folds = folds self.rep_reader = RepReader(word_rep_file) self.input_size = self.rep_reader.rep_shape[0] if modeltype == "mlp": self.hidden_sizes = [20, 10] else: self.hidden_size = 20 self.max_iter = 100 self.learning_rate = 0.01 self.tag_index = None self.modeltype = modeltype if train: print >> sys.stderr, "Statement classifier initialized for training." if self.cv: print >> sys.stderr, "Cross-validation will be done" self.classifier = None else: self.classifier = cPickle.load(open(self.trained_model_name, "rb")) print >> sys.stderr, "Stored model loaded. Statement classifier initialized for prediction."
def __init__(self, params, word_rep_file=None, pickled_rep_reader=None): self.params = params if pickled_rep_reader: self.rep_reader = pickled_rep_reader elif word_rep_file: self.rep_reader = RepReader(word_rep_file) self.input_size = self.rep_reader.rep_shape[0] self.tagger = None
def __init__(self, word_rep_file=None, pickled_rep_reader=None): if pickled_rep_reader: self.rep_reader = pickled_rep_reader elif word_rep_file: self.rep_reader = RepReader(word_rep_file) try: self.input_size = self.rep_reader.rep_shape[0] except: self.input_size = 0 self.tagger = None
parser.add_argument('-i', '--inFile', help='Input File') parser.add_argument('-t', '--textColumn', help='Name of text column') parser.add_argument('-l', '--labelColumn', help='Name of text column') parser.add_argument('-e', '--esIndex', help='ElasticSearch Index Name') parser.add_argument('-m', '--modelFile', help='Keras model file') ''' ''' SIGNATURE FOR ADDING FLAGS add_boolean_argument(parser, 'full_text_pdf') ''' args = parser.parse_args() base_dir = '/Users/Gully/Documents/Projects/2_active/corpora_local/intact/2018-04-17-cleanup/' index_name = 'oa_all_fasttext' model_file_name = 'i_meth_label.model.h5' rep_reader = RepReader(index_name=index_name, elastic=True) # From https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/input_fn/boston.py COLUMNS = ["ID", "i_meth", "p_meth", "pmid", "subfig", "text"] FEATURES = ["text"] LABEL = "p_meth" interaction_df = pd.read_csv(base_dir + 'ontologies/i_meth_codes.tsv', sep='\t', names=['text', 'uri', 'label'], index_col=0) interaction_df participant_df = pd.read_csv(base_dir + 'ontologies/p_meth_codes.tsv', sep='\t', names=['text', 'uri', 'label'],
parser.add_argument('inFile', help='Input File') parser.add_argument('textColumn', help='Name of text column') parser.add_argument('labelColumn', help='Name of text column') parser.add_argument('testSize', help='Size of held-out test set') parser.add_argument('--kerasFile', help='Keras model file') parser.add_argument('--esIndex', help='ElasticSearch Representation Index Name') parser.add_argument('--repFile', help='Representation File Path') add_boolean_argument(parser, 'randomizeTestSet') args = parser.parse_args() rep_reader = None if args.repFile is not None: rep_reader = RepReader(embedding_file=args.repFile, elastic=False) elif args.esIndex is not None: rep_reader = RepReader(index_name=args.esIndex, elastic=True) else: raise ValueError( "You must specify either kerasFile or esIndex. Neither specified.") sd = SpreadsheetData(args.inFile, args.textColumn, args.labelColumn, args.testSize, args.randomizeTestSet) # embedding matrix print('preparing embedding matrix...') words_not_found = [] nb_words = min(sd.MAX_NB_WORDS, len(sd.word_index) + 1) embed_dim = rep_reader.rep_shape[0] embedding_matrix = np.zeros((nb_words, embed_dim))