コード例 #1
0
 def __init__(self, word_rep_file=None, pickled_rep_reader=None):
     if pickled_rep_reader:
         self.rep_reader = pickled_rep_reader
     elif word_rep_file:
         self.rep_reader = RepReader(word_rep_file)
     else:
         self.rep_reader = RepReader(elastic=True)
     self.input_size = self.rep_reader.rep_shape[0]
     self.tagger = None
コード例 #2
0
ファイル: nn_classifier.py プロジェクト: BMKEG/exp-parser
 def __init__(self,
              word_rep_file,
              train=False,
              cv=True,
              folds=5,
              modeltype="mlp",
              trained_model_name="trained_model.pkl",
              tagset_file="tagset.pkl"):
     self.trained_model_name = "%s_%s" % (modeltype, trained_model_name)
     self.cv = cv
     self.folds = folds
     self.rep_reader = RepReader(word_rep_file)
     self.input_size = self.rep_reader.rep_shape[0]
     if modeltype == "mlp":
         self.hidden_sizes = [20, 10]
     else:
         self.hidden_size = 20
     self.max_iter = 100
     self.learning_rate = 0.01
     self.tag_index = None
     self.modeltype = modeltype
     if train:
         print >> sys.stderr, "Statement classifier initialized for training."
         if self.cv:
             print >> sys.stderr, "Cross-validation will be done"
         self.classifier = None
     else:
         self.classifier = cPickle.load(open(self.trained_model_name, "rb"))
         print >> sys.stderr, "Stored model loaded. Statement classifier initialized for prediction."
コード例 #3
0
 def __init__(self, params, word_rep_file=None, pickled_rep_reader=None):
     self.params = params
     if pickled_rep_reader:
         self.rep_reader = pickled_rep_reader
     elif word_rep_file:
         self.rep_reader = RepReader(word_rep_file)
     self.input_size = self.rep_reader.rep_shape[0]
     self.tagger = None
コード例 #4
0
ファイル: nn_passage_tagger.py プロジェクト: jacklxc/TF_sciDT
 def __init__(self, word_rep_file=None, pickled_rep_reader=None):
     if pickled_rep_reader:
         self.rep_reader = pickled_rep_reader
     elif word_rep_file:
         self.rep_reader = RepReader(word_rep_file)
     try:
         self.input_size = self.rep_reader.rep_shape[0]
     except:
         self.input_size = 0
     self.tagger = None
コード例 #5
0
    parser.add_argument('-i', '--inFile', help='Input File')
    parser.add_argument('-t', '--textColumn', help='Name of text column')
    parser.add_argument('-l', '--labelColumn', help='Name of text column')
    parser.add_argument('-e', '--esIndex', help='ElasticSearch Index Name')
    parser.add_argument('-m', '--modelFile', help='Keras model file')
    '''
    '''
    SIGNATURE FOR ADDING FLAGS
    add_boolean_argument(parser, 'full_text_pdf')
    '''
    args = parser.parse_args()

    base_dir = '/Users/Gully/Documents/Projects/2_active/corpora_local/intact/2018-04-17-cleanup/'
    index_name = 'oa_all_fasttext'
    model_file_name = 'i_meth_label.model.h5'
    rep_reader = RepReader(index_name=index_name, elastic=True)
    # From https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/input_fn/boston.py

    COLUMNS = ["ID", "i_meth", "p_meth", "pmid", "subfig", "text"]
    FEATURES = ["text"]
    LABEL = "p_meth"

    interaction_df = pd.read_csv(base_dir + 'ontologies/i_meth_codes.tsv',
                                 sep='\t',
                                 names=['text', 'uri', 'label'],
                                 index_col=0)
    interaction_df

    participant_df = pd.read_csv(base_dir + 'ontologies/p_meth_codes.tsv',
                                 sep='\t',
                                 names=['text', 'uri', 'label'],
コード例 #6
0
    parser.add_argument('inFile', help='Input File')
    parser.add_argument('textColumn', help='Name of text column')
    parser.add_argument('labelColumn', help='Name of text column')
    parser.add_argument('testSize', help='Size of held-out test set')
    parser.add_argument('--kerasFile', help='Keras model file')
    parser.add_argument('--esIndex',
                        help='ElasticSearch Representation Index Name')
    parser.add_argument('--repFile', help='Representation File Path')

    add_boolean_argument(parser, 'randomizeTestSet')

    args = parser.parse_args()

    rep_reader = None
    if args.repFile is not None:
        rep_reader = RepReader(embedding_file=args.repFile, elastic=False)
    elif args.esIndex is not None:
        rep_reader = RepReader(index_name=args.esIndex, elastic=True)
    else:
        raise ValueError(
            "You must specify either kerasFile or esIndex. Neither specified.")

    sd = SpreadsheetData(args.inFile, args.textColumn, args.labelColumn,
                         args.testSize, args.randomizeTestSet)

    # embedding matrix
    print('preparing embedding matrix...')
    words_not_found = []
    nb_words = min(sd.MAX_NB_WORDS, len(sd.word_index) + 1)
    embed_dim = rep_reader.rep_shape[0]
    embedding_matrix = np.zeros((nb_words, embed_dim))