Python DataProcessor.make_data Examples

Programming Language: Python

Namespace/Package Name: process_data

Class/Type: DataProcessor

Method/Function: make_data

Examples at hotexamples.com: 3

Python DataProcessor.make_data - 3 examples found. These are the top rated real world Python examples of process_data.DataProcessor.make_data extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DataProcessor(4)

make_data(3)

getMaxWords(2)

getDatasets(1)

getTrainingData(1)

process_all(1)

process_data(1)

Example #1

Show file

File: run_event_ae.py Project: pdasigi/spade

)
argparser.add_argument(
    "--rec_model_type", type=str, help="Reconstruction model (gaussian, multinomial)", default="gaussian"
)
args = argparser.parse_args()
pred_arg_pos = args.word_types.split("_")
learning_rate = args.lr
use_pretrained_wordrep = False
if args.pt_rep:
    use_pretrained_wordrep = True
    pt_word_rep = {
        l.split()[0]: numpy.asarray([float(f) for f in l.strip().split()[1:]]) for l in gzip.open(args.pt_rep)
    }

dp = DataProcessor(pred_arg_pos)
x_data, y_s_data, w_ind, c_ind, w_h_map, w_oov, c_oov = dp.make_data(args.train_file, relaxed=args.use_relaxation)
rev_w_ind = {ind: word for word, ind in w_ind.items()}
rev_c_ind = {ind: concept for concept, ind in c_ind.items()}

init_hyp_strengths = None
if args.rec_model_type == "multinomial":
    init_hyp_strengths = numpy.zeros((len(c_ind), len(w_ind)))
    for word in w_h_map:
        word_ind = w_ind[word] if word in w_ind else 0
        for concept in w_h_map[word]:
            concept_ind = c_ind[concept] if concept in c_ind else 0
            init_hyp_strengths[concept_ind][word_ind] = 1.0

if len(w_oov) != 0:
    print >> sys.stderr, "Regarding %d words as OOV" % (len(w_oov))

Example #2

Show file

File: run_event_ae_parallel.py Project: pdasigi/spade

from event_ae import EventAE
from process_data import DataProcessor 

sys.setrecursionlimit(10000)
num_args = 2
num_slots = num_args + 1
hyp_hidden_size = 50
learning_rate = 0.01
wc_hidden_sizes = [50] * num_slots
cc_hidden_sizes = [50] * num_args
max_iter = 10

num_procs = int(sys.argv[2])

dp = DataProcessor()
x_data, y_s_data, w_ind, c_ind, w_h_map = dp.make_data(sys.argv[1])

vocab_file = codecs.open("vocab.txt", "w", "utf-8")
for w, ind in w_ind.items():
  print >>vocab_file, w, ind
vocab_file.close()

ont_file = codecs.open("ont.txt", "w", "utf-8")
for c, ind in c_ind.items():
  print >>ont_file, c, ind
ont_file.close()

rev_w_ind = {ind:word for word, ind in w_ind.items()}
rev_c_ind = {ind:concept for concept, ind in c_ind.items()}
train_data = zip(x_data, y_s_data)
sanity_test_data = random.sample(train_data, len(train_data)/10)

Example #3

Show file

File: score.py Project: pdasigi/spade

#argparser.add_argument('--pt_rep', type=str, help="File containing pretrained embeddings")
argparser.add_argument('--use_em', help="Use EM (Default is False)", action='store_true')
argparser.set_defaults(use_em=False)
argparser.add_argument('--use_nce', help="Use NCE for estimating encoding probability. (Default is False)", action='store_true')
argparser.set_defaults(use_nce=False)
argparser.add_argument('--hyp_model_type', type=str, help="Hypernymy model (weighted_prod, linlayer, tanhlayer)", default="weighted_prod")
argparser.add_argument('--wc_pref_model_type', type=str, help="Word-concept preference model (weighted_prod, linlayer, tanhlayer)", default="tanhlayer")
argparser.add_argument('--cc_pref_model_type', type=str, help="Concept-concept preference model (weighted_prod, linlayer, tanhlayer)", default="tanhlayer")
argparser.add_argument('--rec_model_type', type=str, help="Reconstruction model (gaussian, multinomial)", default="gaussian")
argparser.add_argument('--param_iter', type=int, help="Iteration of learned param to use (default 1)", default=1)
args = argparser.parse_args()

use_relaxation = args.use_relaxation
pred_arg_pos = args.word_types.split("_")
dp = DataProcessor(pred_arg_pos)
x_data, y_s_data, w_ind, c_ind, _, _, _ = dp.make_data(args.test_file, relaxed=args.use_relaxation, handle_oov=False)

num_slots = len(pred_arg_pos)
num_args = num_slots - 1
hyp_hidden_size = 20
wc_hidden_sizes = [20] * num_slots
cc_hidden_sizes = [20] * num_args

#use_pretrained_wordrep = False
#if args.pt_rep:
#  print >>sys.stderr, "Using pretrained word representations from %s"%(args.pt_rep)
#  use_pretrained_wordrep = True
#  pt_word_rep = {l.split()[0]: numpy.asarray([float(f) for f in l.strip().split()[1:]]) for l in gzip.open(args.pt_rep)}

train_vocab_file = codecs.open(args.vocab_file, "r", "utf-8")
train_ont_file = codecs.open(args.ont_file, "r", "utf-8")