def examples(self): return [ Example(input='xuxux', semantics='{}'), Example(input='xuxux miami', semantics='{}'), Example(input='miami xuxux', semantics='{}'), Example(input='xuxux miami xuxux', semantics='{}'), ]
def construct_examples(raw_data): examples = [] for o in raw_data: d = o['reviews'][0] if None in [d['text'], d['rating']]: continue if d['title'] is None: d['title'] = "" review = d['title'] + " " + " STOP START ".join(d['text']) if 'gender' in o and 'birth_year' in o: if o['gender'] is None or o['birth_year'] is None: continue gen = map_gender[o['gender']] age = bucket_age(o['birth_year'], d['date']) if age != None: meta = set() if gen: meta.add(GENDER) if age: meta.add(BIRTH) ex = Example(review, int(d['rating']) - 1, metadata=meta) if len(ex.get_sentence()) == 0: continue examples.append(ex) return examples
def preprocess_data(model, raw): lexicon = model.lexicon data = [] for raw_ex in raw: x_str, y_str, sub_domain = raw_ex in_vocabulary = model.specs[sub_domain].in_vocabulary out_vocabulary = model.specs[sub_domain].out_vocabulary domain_vocaulary = model.specs[sub_domain].domain_vocabulary in_vocabulary_shared = model.specs['_shared_'].in_vocabulary out_vocabulary_shared = model.specs['_shared_'].out_vocabulary ex = Example(x_str, y_str, in_vocabulary_shared, out_vocabulary_shared, domain_vocaulary, lexicon, reverse_input=OPTIONS.reverse_input, sub_domain=sub_domain) ex_domain = Example(x_str, y_str, in_vocabulary, out_vocabulary, domain_vocaulary, lexicon, reverse_input=OPTIONS.reverse_input, sub_domain=sub_domain) data.append((ex, ex_domain)) return data
def _process_buff(self, buff): """ :param buff: :return: """ len_cntr = Counter() for sent in buff: len_cntr[len(sent)] += 1 bkts_splits = KMeans(self.n_bkts, len_cntr).splits # Count the sents length # Use k-means to splits the sents into n_bkts parts # reset bucket size # map the lenth to bkts id prev_size = -1 for bkt_idx, size in enumerate(bkts_splits): self.buckets[bkt_idx].set_size(size) self.len2bkts.update( zip(range(prev_size + 1, size + 1), [bkt_idx] * (size - prev_size))) prev_size = size # map all length from min to max to bkts id # some of lengths do not appear in the data set for sent in buff: # Add the sent to the specific bucket according to their length # Construct the sent into example first # And then push them into buckets bkt_idx = self.len2bkts[len(sent)] example = Example(sent, self._config) example.convert(self.vocabs) # save to bucket idx = self.buckets[bkt_idx].add(example) self.id2position.append((bkt_idx, idx))
def train(self, dataset, eta=0.1, T=[], verbose=False, dev_data=None, l2_reg=0.0, distract_num = 0, distract_prob=0.0, concat_num=1, concat_prob=0.0, augmenter=None, aug_frac=0.0): # train with SGD (batch size = 1) cur_lr = eta max_iters = sum(T) lr_changes = set([sum(T[:i]) for i in range(1, len(T))]) for it in range(max_iters): t0 = time.time() if it in lr_changes: # Halve the learning rate cur_lr = 0.5 * cur_lr total_nll = 0.0 random.shuffle(dataset) cur_dataset = dataset if augmenter: # Do data augmentation on the fly aug_num = int(round(aug_frac * len(dataset))) aug_exs = [Example( x, y, dataset[0].input_vocab, dataset[0].output_vocab, dataset[0].lex, reverse_input=dataset[0].reverse_input) for x, y in augmenter.sample(aug_num)] cur_dataset = cur_dataset + aug_exs random.shuffle(cur_dataset) if concat_num > 1: # Generate new concatenated examples on the fly num_concat_exs = int(round(len(cur_dataset) * concat_prob / concat_num)) * concat_num normal_exs = cur_dataset[num_concat_exs:] concat_exs = [] for i in range(num_concat_exs / concat_num): cur_exs = cur_dataset[i*concat_num:(i+1)*concat_num] new_x_str = (' ' + Vocabulary.END_OF_SENTENCE + ' ').join( ex.x_str for ex in cur_exs) new_y_str = (' ' + Vocabulary.END_OF_SENTENCE + ' ').join( ex.y_str for ex in cur_exs) new_ex = Example(new_x_str, new_y_str, dataset[0].input_vocab, dataset[0].output_vocab, dataset[0].lex, reverse_input=dataset[0].reverse_input) concat_exs.append(new_ex) cur_dataset = concat_exs + normal_exs random.shuffle(cur_dataset) for i, ex in enumerate(cur_dataset): if i%10==0: print "processed {} training samples".format(i) do_distract = distract_num > 0 and random.random() < distract_prob if do_distract: distractors = random.sample(dataset, distract_num) nll = self.sgd_step(ex, cur_lr, l2_reg, distractors=distractors) else: nll = self.sgd_step(ex, cur_lr, l2_reg) total_nll += nll dev_nll = 0.0 if dev_data: for ex in dev_data: dev_nll += self._get_nll(ex.x_inds, ex.y_inds, ex.d_inds, ex.y_in_x_inds) #self.on_train_epoch(it) t1 = time.time() print 'NeuralModel.train(): iter %d (lr = %g): train obj = %g, dev nll = %g (%g seconds)' % ( it, cur_lr, total_nll, dev_nll, t1 - t0)
def Make_feature_file(authorIdPaperIds, dict_coauthor, dict_paperIdAuthorId_to_name_aff, PaperAuthor, Author, feature_function_list, to_file): example_list = [] dimension = 0 process_bar = pyprind.ProgPercent(len(authorIdPaperIds)) for authorIdPaperId in authorIdPaperIds: process_bar.update() features = [ feature_function(authorIdPaperId, dict_coauthor, dict_paperIdAuthorId_to_name_aff, PaperAuthor, Author) for feature_function in feature_function_list ] #合并特征 feature = util.mergeFeatures(features) dimension = feature.dimension #特征target target = authorIdPaperId.label if target is None: target = "-1" #example example = Example(target, feature) # example.comment = json.dumps({"paperId": authorIdPaperId.paperId, "authorId": authorIdPaperId.authorId}) example.comment = "%s %s" % (authorIdPaperId.paperId, authorIdPaperId.authorId) example_list.append(example) util.write_example_list_to_file(example_list, to_file) # to arff file util.write_example_list_to_arff_file(example_list, dimension, to_file + ".arff")
def examples(): # define exmaples and send them in to the exmaples html template # examples = origAudioLink, origSpectroLink, dreamedAudioLink, dreamedSpectroLink ex1 = Example("/audio/helix_drum_track.wav", "/images/Slurm-1-.jpg", "/audio/thief_44100.wav", "/images/Unknown-33.jpg") ex2 = Example("/audio/helix_drum_track.wav", "/images/Slurm-1-.jpg", "/audio/thief_44100.wav", "/images/Unknown-33.jpg") examples = [ex1, ex2] return render_template('examples.html', examples=examples)
def test_examples(self): return [ Example(input="minus three", semantics=('~', 3), denotation=-3), Example(input="three plus two", semantics=('+', 3, 2), denotation=5), Example(input="two times two plus three", semantics=('+', ('*', 2, 2), 3), denotation=7), Example(input="minus four", semantics=('~', 4), denotation=-4), ]
class TestExample(unittest.TestCase): def setUp(self): self.foo = Example(1) def test_Increment(self): self.assertEqual(self.foo.Increment(1), 2) self.assertEqual(self.foo.Increment(2), 4) def test_Decrement(self): self.assertEqual(self.foo.Decrement(1), 0) self.assertEqual(self.foo.Decrement(2), -2)
def change(self, file_train): i, j, x, y = self.extract_feature(file_train) all_examples = [] for idx in range(len(i)): m = i[idx] example = Example() for a in m: if a in self.word_AlphaBet.dict: example.m_word_indexes.append(self.word_AlphaBet.dict[a]) label_list = [0, 0, 0, 0, 0] b = int(j[idx]) label_list[b] = 1 example.m_label_index = label_list all_examples.append(example) return all_examples
def read(images, labels): """ Read the digits image and label files and build the example objects with the feature vectors. :param images: Name of the text file containing the images :param labels: Name of the text file containing the labels :return: A list of Example objects """ data = [] with open(labels, 'r', encoding='utf-8') as label_file: for each_line in label_file: example = Example(int(each_line.strip())) data.append(example) image_row = 0 count = 0 with open(images, 'r', encoding='utf-8') as image_file: for each_line in image_file: if image_row == 0: feature_vector = np.zeros(NUM_FEATURES, int) feature_vector[0] = 1 # bias feature = 1 for each_char in each_line[0:IMAGE_SIZE]: feature_vector[feature] = 0 if each_char == ' ' else 1 feature += 1 image_row = (image_row + 1) % IMAGE_SIZE if image_row == 0: data[count].fvector = feature_vector count += 1 data[-1].fvector = feature_vector return data
def get_batch_generator(self, mode="train", single_pass=False): """Get a generator which is to yield a Batch Instance mode: can be train, eval, infer single_pass: if True, then this """ assert mode in ["train", "eval", "infer"], "model can be {train, eval, infer}" mode_data = self.video_data[self.video_data["mode"] == mode] video_captions = zip(mode_data["video_path"].values, mode_data["caption"].values) example_num = len(video_captions) print "mode = {mode} and the sample num is {sample_num}".format(mode=mode, sample_num=example_num) while True: random.shuffle(video_captions) for start, end in zip( range(0, example_num, self.hparams.batch_size), range(self.hparams.batch_size, example_num, self.hparams.batch_size)): example_list = video_captions[start: end] def _load_video_feat(video_path): return np.load(video_path) example_list = map(lambda example: Example(_load_video_feat(example[0]), example[1], vocab=self.hparams.word2id), example_list) yield Batch(example_list) if self.single_pass or single_pass: print "infer mode: no more data" break
def read(filename): """ Read the Iris csv file and construct the Example objects. The file is assumed to contain 5 columns: sepal length, sepal width, petal length, petal width and training label. The first 4 columns will be used as features in the feature vector constructed. :param filename: Name of the csv file containing iris data :return: A list of Example objects """ data = [] labels = [] with open(filename, 'r', encoding='utf-8') as label_file: for each_line in label_file: example_data = each_line.strip().split(',') num_features = len(example_data) feature_vector = np.zeros(num_features, float) feature_vector[0] = 1 # bias for count in range(num_features - 1): feature = float(example_data[count]) feature_vector[count + 1] = feature label = example_data[num_features - 1] example = Example(label, feature_vector) data.append(example) return data
def read_data(filename): examples = [] for line in open(filename): line = line.strip().split("\t") topic = line[0] age = line[1] gender = line[2] user = line[3] text = line[4] if topic != "None": meta = set() if age == "1": meta.add(0) if gender == "f": meta.add(1) topic = int(topic) examples.append(Example(text, topic, meta)) return examples
def preprocess_data(model, raw): in_vocabulary = model.in_vocabulary out_vocabulary = model.out_vocabulary domain_stats_vocab = model.domain_stats_vocab lexicon = model.lexicon data = [] for raw_ex in raw: x_str, y_str, x_orig, y_orig, x_orig_same, y_orig_same, src_domain, pos, src_domain_stats = raw_ex d_inds = [ domain_stats_vocab.domain_to_index[src_domain] for x in out_vocabulary.sentence_to_indices(y_str) ] ex = Example(x_str, y_str, x_orig, y_orig, x_orig_same, y_orig_same, src_domain, pos, in_vocabulary, out_vocabulary, d_inds, lexicon, reverse_input=OPTIONS.reverse_input) data.append(ex) return data
def preprocess_data(model, raw): in_vocabulary = model.in_vocabulary out_vocabulary = model.out_vocabulary #if OPTIONS.model=='attn2hist': # domain_size = model.domain_size #else: #print(len(DOMAINS)) domain_size = len(DOMAINS) #doma lexicon = model.lexicon #print('lexicon:',lexicon) #print('raw:',raw) #print('in_vocabulary:',in_vocabulary) #print('out_vocabulary:',out_vocabulary) data = [] for raw_ex in raw: x_str, y_str, z_str = raw_ex ex = Example(x_str, y_str, z_str, in_vocabulary, out_vocabulary, domain_size, lexicon, reverse_input=OPTIONS.reverse_input) data.append(ex) return data
def gen_examples(): """ Generate train, dev, test examples from data. Extract news articles only belonging to topics 'World', 'Entertainment', 'Sports', and 'Business'. Retain only those examples that contain an instance of one of the top 5 most frequent named entities. """ examples = [] temp = [] categories = ['World', 'Entertainment', 'Sports', 'Business'] cat_label = {x: i for i, x in enumerate(categories)} tree_root = get_data() for child in tree_root: if child.tag == 'title' or child.tag == 'category' or child.tag == 'description': temp.append(child.text) if child.tag == 'pubdate': if len(temp) == 3: if temp[1] in categories: if temp[0] is not None and temp[2] is not None: X = temp[0] + " " + temp[2] X_processed = unescape_chars(X) Y = cat_label[temp[1]] ex = Example(X_processed, Y) examples.append(ex) temp = [] new_examples = NER.ne_extract(examples, top=5) train, dev, test = train_test_split(new_examples) return train, dev, test
def accept(): if request.method == 'POST': print("HALP!!") if 'place' not in request.form: return "No place in the input!" if 'stage' not in request.form: return "No stage in the input!" place = str(request.form['place']) stage = str(request.form['stage']) if ((len(place) < 1) or (len(stage) < 1)): method = str(request.form['method']) flash("Please enter proper place and stage", "danger") return redirect(url_for("home")) # formattedString = changeRingingStringChecker(place, stage) # audioMaker(formattedString) # imageMaker(formattedString) print("Building example") formattedString = notationReader(place, stage) example = Example('audio/'+formattedString+'.wav', 'images/'+formattedString+'.jpg') methodPlayer(formattedString) methodDrawer(formattedString) return render_template('results.html', example=example, formattedString=formattedString) if request.method == 'GET': return "A get request to accept?!?"
def run_eval(): import csv assert OPTIONS.load_file is not None assert OPTIONS.input is not None train_raw = load_dataset(OPTIONS.train_data) random.seed(OPTIONS.model_seed) numpy.random.seed(OPTIONS.model_seed) spec = init_spec(train_raw) model = get_model(spec) reader = csv.reader(OPTIONS.input, delimiter='\t') writer = csv.writer(OPTIONS.output, delimiter='\t') header = next(reader) #assert header == ['id', 'input'] writer.writerow(['id', 'input', 'output', 'score']) for id, input in reader: s = input.strip() example = Example(s, '', model.in_vocabulary, model.out_vocabulary, model.lexicon, reverse_input=OPTIONS.reverse_input) deriv = decode(model, example)[0] output = " ".join(deriv.y_toks).strip() score = deriv.p writer.writerow([id, input, output, score])
def _load_examples(self, klass, example_group): for example in self._examples_in(klass): tags = example._tags if self._is_pending_example(example) or self._is_pending_example_group(example_group): example_group.append(PendingExample( example, tags=tags, module=self.module)) else: example_group.append( Example(example, tags=tags, module=self.module))
def make_examples(filename): examples = [] with open(filename) as f: raw = json.load(f) for raw_example in raw: # TODO: support multiple anser examples.append( Example(input=preprocess(raw_example['text']), denotation=raw_example['ans_simple'])) return examples
def upload_audio(): if request.method == 'POST': # check if the post request has the file part if 'file' not in request.files: flash('No file part', 'danger') return redirect(url_for('home')) file = request.files['file'] # if user does not select file, browser also # submit a empty part without filename if file.filename == '': flash('No selected file', 'danger') return redirect(url_for('home')) if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) filelocation = os.path.join(app.config['UPLOAD_FOLDER'], filename) print(filelocation) #get variables from the user's form layer = request.form['layer'] channel = int(request.form['channel']) # path_to_audio = request.form['path_to_audio'] iterations = int(request.form['iterations']) octaves = int(request.form['octaves']) path_to_audio = "./audio/" + str(filename) audio_name = filename.split('.')[0] print("Audio name is: ") print(audio_name) print("The forms data:") print(layer, channel, path_to_audio, iterations, octaves) #run the function return_object = deepdream_func(layer, channel, path_to_audio, iterations, octaves, audio_name) if (return_object == -1): #return error message print("Showing error flash?!?") flash( 'Please select a channel that is in range for this layer', 'danger') return redirect(url_for('home')) else: print(return_object) #return image ex = Example( os.path.join('/audio', return_object['audio_filename']), "/images/in.jpg", return_object['audio_filename_new'], "/images/out.jpg") return render_template('results.html', example=ex) # return 'upload complete' elif request.method == 'GET': return send_from_directory("uploads", "the_books.mp3") return
def examples(self): all_examples = [] entities, intents = self.parse() for name, intent in intents.items(): for text in intent['examples']: example = Example(text, name, entities) all_examples.append(example) return all_examples
def add_examples(self, list_of_examples): """ Adds all of the provided Examples to BasicGrid. :param list_of_examples: A list of Examples. """ for example_as_a_list in list_of_examples: if self.check_if_proper_example_coordinates( coordinates=example_as_a_list[:-1]): self.basic_grid.add_example_to_grid( example=Example(example_as_a_list))
class TestConfiguration(unittest.TestCase): def setUp(self): self.ex = Example() def tearDown(self): del (self.ex) def testVariable(self): expected = 120 result = self.ex.fact(5) self.assertEqual(expected, result)
class TestConfiguration(unittest.TestCase): def setUp(self): self.ex = Example() def tearDown(self): del(self.ex) def testVariable(self): expected = 120 result = self.ex.fact(5) self.assertEqual(expected, result)
def update(self, list_of_examples): """ Adds the Examples to the example_queue. :param list_of_examples: A list of new Examples. """ for example_as_a_list in list_of_examples: if self.check_if_proper_example_coordinates( coordinates=example_as_a_list[:-1]): self.example_queue.append( Example(observation=example_as_a_list)) self.batch_update()
def test(self, example): """ Given a list of coordinates and a class id at the last index, creates an Example object and classifies it. :param example: A list of coordinates and a class id at the last index. :return: Class id. """ if not is_array_numeric(array=example[:-1]): print("Observation coordinates have to be numeric") return None example = Example(observation=example) return self.classify(example_coords=example.coords)
def addTabEdit(self, path): if path is not None: e = Example() highLighter = MyHighlighter(self.parent.symbolWidget, parent=e.edit.document()) self.listofHighlighters.append(highLighter) try: with open(path, 'r') as f: text = f.read() e.edit.setText(text) except Exception: pass self.dictOfTabsEdits.addPath(path, e) self.addTab(e, getFileName(path)) e.edit.cursorPositionChanged.connect(self.parent.setValuesOfFormat) else: e = Example() highLighter = MyHighlighter(self.parent.symbolWidget, parent=e.edit.document()) self.listofHighlighters.append(highLighter) self.dictOfTabsEdits.addPath(path, e) self.addTab(e, getFileName(path)) e.edit.cursorPositionChanged.connect(self.parent.setValuesOfFormat)
def reject(self, test_example): u = self.u f = None e = Example() if type(test_example) == type(e): f = test_example.features_u else: f = test_example r = f.dot(u.T) return r
def classify(self, test_example): w = self.w f = None e = Example() if type(test_example) == type(e): f = test_example.features_w else: f = test_example h = f.dot(w.T) return h
def setUp(self): self.ex = Example()