예제 #1
0
 def examples(self):
     return [
         Example(input='xuxux', semantics='{}'),
         Example(input='xuxux miami', semantics='{}'),
         Example(input='miami xuxux', semantics='{}'),
         Example(input='xuxux miami xuxux', semantics='{}'),
     ]
예제 #2
0
def construct_examples(raw_data):
    examples = []
    for o in raw_data:
        d = o['reviews'][0]
        if None in [d['text'], d['rating']]:
            continue
        if d['title'] is None:
            d['title'] = ""

        review = d['title'] + " " + " STOP START ".join(d['text'])

        if 'gender' in o and 'birth_year' in o:
            if o['gender'] is None or o['birth_year'] is None:
                continue
            gen = map_gender[o['gender']]
            age = bucket_age(o['birth_year'], d['date'])

            if age != None:

                meta = set()
                if gen:
                    meta.add(GENDER)
                if age:
                    meta.add(BIRTH)
                ex = Example(review, int(d['rating']) - 1, metadata=meta)

                if len(ex.get_sentence()) == 0:
                    continue
                examples.append(ex)
    return examples
예제 #3
0
def preprocess_data(model, raw):
    lexicon = model.lexicon

    data = []
    for raw_ex in raw:
        x_str, y_str, sub_domain = raw_ex
        in_vocabulary = model.specs[sub_domain].in_vocabulary
        out_vocabulary = model.specs[sub_domain].out_vocabulary
        domain_vocaulary = model.specs[sub_domain].domain_vocabulary
        in_vocabulary_shared = model.specs['_shared_'].in_vocabulary
        out_vocabulary_shared = model.specs['_shared_'].out_vocabulary
        ex = Example(x_str,
                     y_str,
                     in_vocabulary_shared,
                     out_vocabulary_shared,
                     domain_vocaulary,
                     lexicon,
                     reverse_input=OPTIONS.reverse_input,
                     sub_domain=sub_domain)
        ex_domain = Example(x_str,
                            y_str,
                            in_vocabulary,
                            out_vocabulary,
                            domain_vocaulary,
                            lexicon,
                            reverse_input=OPTIONS.reverse_input,
                            sub_domain=sub_domain)
        data.append((ex, ex_domain))
    return data
예제 #4
0
    def _process_buff(self, buff):
        """
    :param buff:
    :return:
    """
        len_cntr = Counter()
        for sent in buff:
            len_cntr[len(sent)] += 1
        bkts_splits = KMeans(self.n_bkts, len_cntr).splits
        # Count the sents length
        # Use k-means to splits the sents into n_bkts parts

        # reset bucket size
        # map the lenth to bkts id
        prev_size = -1
        for bkt_idx, size in enumerate(bkts_splits):
            self.buckets[bkt_idx].set_size(size)
            self.len2bkts.update(
                zip(range(prev_size + 1, size + 1),
                    [bkt_idx] * (size - prev_size)))
            prev_size = size
            # map all length from min to max to bkts id
            # some of lengths do not appear in the data set
        for sent in buff:
            # Add the sent to the specific bucket according to their length
            # Construct the sent into example first
            # And then push them into buckets
            bkt_idx = self.len2bkts[len(sent)]
            example = Example(sent, self._config)
            example.convert(self.vocabs)
            # save to bucket
            idx = self.buckets[bkt_idx].add(example)
            self.id2position.append((bkt_idx, idx))
예제 #5
0
  def train(self, dataset, eta=0.1, T=[], verbose=False, dev_data=None,
            l2_reg=0.0, distract_num = 0, distract_prob=0.0,
            concat_num=1, concat_prob=0.0, augmenter=None, aug_frac=0.0):
    # train with SGD (batch size = 1)
    cur_lr = eta
    max_iters = sum(T)
    lr_changes = set([sum(T[:i]) for i in range(1, len(T))])
    for it in range(max_iters):
      t0 = time.time()
      if it in lr_changes:
        # Halve the learning rate
        cur_lr = 0.5 * cur_lr
      total_nll = 0.0
      random.shuffle(dataset)
      cur_dataset = dataset
      if augmenter:
        # Do data augmentation on the fly
        aug_num = int(round(aug_frac * len(dataset)))
        aug_exs = [Example(
            x, y, dataset[0].input_vocab, dataset[0].output_vocab, 
            dataset[0].lex, reverse_input=dataset[0].reverse_input)
            for x, y in augmenter.sample(aug_num)]
        cur_dataset = cur_dataset + aug_exs
        random.shuffle(cur_dataset)
      if concat_num > 1:
        # Generate new concatenated examples on the fly
        num_concat_exs = int(round(len(cur_dataset) * concat_prob / concat_num)) * concat_num
        normal_exs = cur_dataset[num_concat_exs:]
        concat_exs = []
        for i in range(num_concat_exs / concat_num):
          cur_exs = cur_dataset[i*concat_num:(i+1)*concat_num]
          new_x_str = (' ' + Vocabulary.END_OF_SENTENCE + ' ').join(
              ex.x_str for ex in cur_exs)
          new_y_str = (' ' + Vocabulary.END_OF_SENTENCE + ' ').join(
              ex.y_str for ex in cur_exs)
          new_ex = Example(new_x_str, new_y_str, dataset[0].input_vocab,
                           dataset[0].output_vocab, dataset[0].lex,
                           reverse_input=dataset[0].reverse_input)
          concat_exs.append(new_ex)
        cur_dataset = concat_exs + normal_exs
        random.shuffle(cur_dataset)

      for i, ex in enumerate(cur_dataset):
        if i%10==0:
          print "processed {} training samples".format(i)
        do_distract = distract_num > 0 and random.random() < distract_prob
        if do_distract:
          distractors = random.sample(dataset, distract_num)
          nll = self.sgd_step(ex, cur_lr, l2_reg, distractors=distractors)
        else:
          nll = self.sgd_step(ex, cur_lr, l2_reg)
          total_nll += nll
      dev_nll = 0.0
      if dev_data:
        for ex in dev_data:
          dev_nll += self._get_nll(ex.x_inds, ex.y_inds, ex.d_inds, ex.y_in_x_inds)
      #self.on_train_epoch(it)
      t1 = time.time()
      print 'NeuralModel.train(): iter %d (lr = %g): train obj = %g, dev nll = %g (%g seconds)' % (
          it, cur_lr, total_nll, dev_nll, t1 - t0)
예제 #6
0
def Make_feature_file(authorIdPaperIds, dict_coauthor,
                      dict_paperIdAuthorId_to_name_aff, PaperAuthor, Author,
                      feature_function_list, to_file):
    example_list = []
    dimension = 0

    process_bar = pyprind.ProgPercent(len(authorIdPaperIds))
    for authorIdPaperId in authorIdPaperIds:
        process_bar.update()

        features = [
            feature_function(authorIdPaperId, dict_coauthor,
                             dict_paperIdAuthorId_to_name_aff, PaperAuthor,
                             Author)
            for feature_function in feature_function_list
        ]
        #合并特征
        feature = util.mergeFeatures(features)
        dimension = feature.dimension
        #特征target
        target = authorIdPaperId.label
        if target is None:
            target = "-1"
        #example
        example = Example(target, feature)
        # example.comment = json.dumps({"paperId": authorIdPaperId.paperId, "authorId": authorIdPaperId.authorId})
        example.comment = "%s %s" % (authorIdPaperId.paperId,
                                     authorIdPaperId.authorId)

        example_list.append(example)

    util.write_example_list_to_file(example_list, to_file)
    # to arff file
    util.write_example_list_to_arff_file(example_list, dimension,
                                         to_file + ".arff")
예제 #7
0
def examples():
    # define exmaples and send them in to the exmaples html template
    # examples  =  origAudioLink, origSpectroLink, dreamedAudioLink, dreamedSpectroLink
    ex1 = Example("/audio/helix_drum_track.wav", "/images/Slurm-1-.jpg",
                  "/audio/thief_44100.wav", "/images/Unknown-33.jpg")
    ex2 = Example("/audio/helix_drum_track.wav", "/images/Slurm-1-.jpg",
                  "/audio/thief_44100.wav", "/images/Unknown-33.jpg")
    examples = [ex1, ex2]
    return render_template('examples.html', examples=examples)
예제 #8
0
 def test_examples(self):
     return [
         Example(input="minus three", semantics=('~', 3), denotation=-3),
         Example(input="three plus two",
                 semantics=('+', 3, 2),
                 denotation=5),
         Example(input="two times two plus three",
                 semantics=('+', ('*', 2, 2), 3),
                 denotation=7),
         Example(input="minus four", semantics=('~', 4), denotation=-4),
     ]
예제 #9
0
class TestExample(unittest.TestCase):

    def setUp(self):
        self.foo = Example(1)

    def test_Increment(self):
        self.assertEqual(self.foo.Increment(1), 2)
        self.assertEqual(self.foo.Increment(2), 4)

    def test_Decrement(self):
        self.assertEqual(self.foo.Decrement(1), 0)
        self.assertEqual(self.foo.Decrement(2), -2)
예제 #10
0
 def change(self, file_train):
     i, j, x, y = self.extract_feature(file_train)
     all_examples = []
     for idx in range(len(i)):
         m = i[idx]
         example = Example()
         for a in m:
             if a in self.word_AlphaBet.dict:
                 example.m_word_indexes.append(self.word_AlphaBet.dict[a])
         label_list = [0, 0, 0, 0, 0]
         b = int(j[idx])
         label_list[b] = 1
         example.m_label_index = label_list
         all_examples.append(example)
     return all_examples
예제 #11
0
 def change(self, file_train):
     i, j, x, y = self.extract_feature(file_train)
     all_examples = []
     for idx in range(len(i)):
         m = i[idx]
         example = Example()
         for a in m:
             if a in self.word_AlphaBet.dict:
                 example.m_word_indexes.append(self.word_AlphaBet.dict[a])
         label_list = [0, 0, 0, 0, 0]
         b = int(j[idx])
         label_list[b] = 1
         example.m_label_index = label_list
         all_examples.append(example)
     return all_examples
예제 #12
0
def read(images, labels):
    """
    Read the digits image and label files and build the
    example objects with the feature vectors.
    :param images: Name of the text file containing the images
    :param labels: Name of the text file containing the labels
    :return: A list of Example objects
    """
    data = []
    with open(labels, 'r', encoding='utf-8') as label_file:
        for each_line in label_file:
            example = Example(int(each_line.strip()))
            data.append(example)

    image_row = 0
    count = 0
    with open(images, 'r', encoding='utf-8') as image_file:
        for each_line in image_file:
            if image_row == 0:
                feature_vector = np.zeros(NUM_FEATURES, int)
                feature_vector[0] = 1  # bias
                feature = 1
            for each_char in each_line[0:IMAGE_SIZE]:
                feature_vector[feature] = 0 if each_char == ' ' else 1
                feature += 1
            image_row = (image_row + 1) % IMAGE_SIZE
            if image_row == 0:
                data[count].fvector = feature_vector
                count += 1
    data[-1].fvector = feature_vector
    return data
예제 #13
0
    def get_batch_generator(self, mode="train", single_pass=False):
        """Get a generator which is to yield a Batch Instance

        mode: can be train, eval, infer
        single_pass: if True, then this
        """

        assert mode in ["train", "eval", "infer"], "model can be {train, eval, infer}"
        mode_data = self.video_data[self.video_data["mode"] == mode]
        video_captions = zip(mode_data["video_path"].values, mode_data["caption"].values)
        example_num = len(video_captions)
        print "mode = {mode} and the sample num is {sample_num}".format(mode=mode, sample_num=example_num)
        while True:
            random.shuffle(video_captions)
            for start, end in zip(
                    range(0, example_num, self.hparams.batch_size),
                    range(self.hparams.batch_size, example_num, self.hparams.batch_size)):
                example_list = video_captions[start: end]

                def _load_video_feat(video_path):
                    return np.load(video_path)
                example_list = map(lambda example: Example(_load_video_feat(example[0]), example[1],
                                                           vocab=self.hparams.word2id), example_list)
                yield Batch(example_list)

            if self.single_pass or single_pass:
                print "infer mode: no more data"
                break
예제 #14
0
파일: iris.py 프로젝트: micompany4/CS-156
def read(filename):
    """
    Read the Iris csv file and construct the Example objects.
    The file is assumed to contain 5 columns:
    sepal length, sepal width, petal length, petal width and training label.
    The first 4 columns will be used as features in the feature vector
    constructed.

    :param filename: Name of the csv file containing iris data
    :return: A list of Example objects
    """
    data = []
    labels = []
    with open(filename, 'r', encoding='utf-8') as label_file:
        for each_line in label_file:
            example_data = each_line.strip().split(',')
            num_features = len(example_data)
            feature_vector = np.zeros(num_features, float)
            feature_vector[0] = 1  # bias
            for count in range(num_features - 1):
                feature = float(example_data[count])
                feature_vector[count + 1] = feature
            label = example_data[num_features - 1]
            example = Example(label, feature_vector)
            data.append(example)
    return data
예제 #15
0
def read_data(filename):

    examples = []

    for line in open(filename):

        line = line.strip().split("\t")

        topic = line[0]
        age = line[1]
        gender = line[2]
        user = line[3]
        text = line[4]

        if topic != "None":
            meta = set()
            if age == "1":
                meta.add(0)
            if gender == "f":
                meta.add(1)

            topic = int(topic)

            examples.append(Example(text, topic, meta))

    return examples
예제 #16
0
def preprocess_data(model, raw):
    in_vocabulary = model.in_vocabulary
    out_vocabulary = model.out_vocabulary
    domain_stats_vocab = model.domain_stats_vocab
    lexicon = model.lexicon

    data = []
    for raw_ex in raw:
        x_str, y_str, x_orig, y_orig, x_orig_same, y_orig_same, src_domain, pos, src_domain_stats = raw_ex
        d_inds = [
            domain_stats_vocab.domain_to_index[src_domain]
            for x in out_vocabulary.sentence_to_indices(y_str)
        ]
        ex = Example(x_str,
                     y_str,
                     x_orig,
                     y_orig,
                     x_orig_same,
                     y_orig_same,
                     src_domain,
                     pos,
                     in_vocabulary,
                     out_vocabulary,
                     d_inds,
                     lexicon,
                     reverse_input=OPTIONS.reverse_input)
        data.append(ex)
    return data
예제 #17
0
def preprocess_data(model, raw):
    in_vocabulary = model.in_vocabulary
    out_vocabulary = model.out_vocabulary
    #if OPTIONS.model=='attn2hist':
    #  domain_size = model.domain_size
    #else:
    #print(len(DOMAINS))
    domain_size = len(DOMAINS)  #doma
    lexicon = model.lexicon
    #print('lexicon:',lexicon)
    #print('raw:',raw)
    #print('in_vocabulary:',in_vocabulary)
    #print('out_vocabulary:',out_vocabulary)
    data = []
    for raw_ex in raw:
        x_str, y_str, z_str = raw_ex
        ex = Example(x_str,
                     y_str,
                     z_str,
                     in_vocabulary,
                     out_vocabulary,
                     domain_size,
                     lexicon,
                     reverse_input=OPTIONS.reverse_input)
        data.append(ex)
    return data
def gen_examples():
    """
    Generate train, dev, test examples from data.
    Extract news articles only belonging to topics 'World', 'Entertainment', 'Sports', and 'Business'.
    Retain only those examples that contain an instance of one of the top 5 most frequent named entities.
    """
    examples = []
    temp = []
    categories = ['World', 'Entertainment', 'Sports', 'Business']
    cat_label = {x: i for i, x in enumerate(categories)}
    tree_root = get_data()

    for child in tree_root:
        if child.tag == 'title' or child.tag == 'category' or child.tag == 'description':
            temp.append(child.text)
        if child.tag == 'pubdate':
            if len(temp) == 3:
                if temp[1] in categories:
                    if temp[0] is not None and temp[2] is not None:
                        X = temp[0] + " " + temp[2]
                        X_processed = unescape_chars(X)
                        Y = cat_label[temp[1]]
                        ex = Example(X_processed, Y)
                        examples.append(ex)
            temp = []
    new_examples = NER.ne_extract(examples, top=5)
    train, dev, test = train_test_split(new_examples)
    return train, dev, test
예제 #19
0
def accept():
    if request.method == 'POST':
        print("HALP!!")
        if 'place' not in request.form:
            return "No place in the input!"
        if 'stage' not in request.form:
            return "No stage in the input!"
        place = str(request.form['place'])
        stage = str(request.form['stage'])
        if ((len(place) < 1) or (len(stage) < 1)):
            method = str(request.form['method'])
            flash("Please enter proper place and stage", "danger")
            return redirect(url_for("home"))
        # formattedString = changeRingingStringChecker(place, stage)
        # audioMaker(formattedString)
        # imageMaker(formattedString)
        print("Building example")

        formattedString = notationReader(place, stage)
        example = Example('audio/'+formattedString+'.wav', 'images/'+formattedString+'.jpg')

        methodPlayer(formattedString)
        methodDrawer(formattedString)
        return render_template('results.html', example=example, formattedString=formattedString)
    if request.method == 'GET':
        return "A get request to accept?!?"
예제 #20
0
def run_eval():
    import csv
    assert OPTIONS.load_file is not None
    assert OPTIONS.input is not None
    train_raw = load_dataset(OPTIONS.train_data)
    random.seed(OPTIONS.model_seed)
    numpy.random.seed(OPTIONS.model_seed)
    spec = init_spec(train_raw)
    model = get_model(spec)

    reader = csv.reader(OPTIONS.input, delimiter='\t')
    writer = csv.writer(OPTIONS.output, delimiter='\t')
    header = next(reader)
    #assert header == ['id', 'input']
    writer.writerow(['id', 'input', 'output', 'score'])
    for id, input in reader:
        s = input.strip()
        example = Example(s,
                          '',
                          model.in_vocabulary,
                          model.out_vocabulary,
                          model.lexicon,
                          reverse_input=OPTIONS.reverse_input)

        deriv = decode(model, example)[0]
        output = " ".join(deriv.y_toks).strip()
        score = deriv.p
        writer.writerow([id, input, output, score])
예제 #21
0
 def _load_examples(self, klass, example_group):
     for example in self._examples_in(klass):
         tags = example._tags
         if self._is_pending_example(example) or self._is_pending_example_group(example_group):
             example_group.append(PendingExample(
                 example, tags=tags, module=self.module))
         else:
             example_group.append(
                 Example(example, tags=tags, module=self.module))
예제 #22
0
def make_examples(filename):
    examples = []
    with open(filename) as f:
        raw = json.load(f)
        for raw_example in raw:
            # TODO: support multiple anser
            examples.append(
                Example(input=preprocess(raw_example['text']),
                        denotation=raw_example['ans_simple']))
    return examples
예제 #23
0
def upload_audio():
    if request.method == 'POST':
        # check if the post request has the file part
        if 'file' not in request.files:
            flash('No file part', 'danger')
            return redirect(url_for('home'))
        file = request.files['file']

        # if user does not select file, browser also
        # submit a empty part without filename
        if file.filename == '':
            flash('No selected file', 'danger')
            return redirect(url_for('home'))
        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
            filelocation = os.path.join(app.config['UPLOAD_FOLDER'], filename)
            print(filelocation)

            #get variables from the user's form
            layer = request.form['layer']
            channel = int(request.form['channel'])
            # path_to_audio = request.form['path_to_audio']
            iterations = int(request.form['iterations'])
            octaves = int(request.form['octaves'])
            path_to_audio = "./audio/" + str(filename)
            audio_name = filename.split('.')[0]
            print("Audio name is: ")
            print(audio_name)

            print("The forms data:")
            print(layer, channel, path_to_audio, iterations, octaves)
            #run the function
            return_object = deepdream_func(layer, channel, path_to_audio,
                                           iterations, octaves, audio_name)
            if (return_object == -1):
                #return error message
                print("Showing error flash?!?")
                flash(
                    'Please select a channel that is in range for this layer',
                    'danger')
                return redirect(url_for('home'))
            else:
                print(return_object)
                #return image
                ex = Example(
                    os.path.join('/audio', return_object['audio_filename']),
                    "/images/in.jpg", return_object['audio_filename_new'],
                    "/images/out.jpg")
                return render_template('results.html', example=ex)

            # return 'upload complete'
    elif request.method == 'GET':
        return send_from_directory("uploads", "the_books.mp3")
    return
예제 #24
0
    def examples(self):
        all_examples = []

        entities, intents = self.parse()

        for name, intent in intents.items():
            for text in intent['examples']:
                example = Example(text, name, entities)
                all_examples.append(example)

        return all_examples
예제 #25
0
 def add_examples(self, list_of_examples):
     """
     Adds all of the provided Examples to BasicGrid.
     
     :param list_of_examples: A list of Examples.
     """
     for example_as_a_list in list_of_examples:
         if self.check_if_proper_example_coordinates(
                 coordinates=example_as_a_list[:-1]):
             self.basic_grid.add_example_to_grid(
                 example=Example(example_as_a_list))
예제 #26
0
class TestConfiguration(unittest.TestCase):
    def setUp(self):
        self.ex = Example()

    def tearDown(self):
        del (self.ex)

    def testVariable(self):
        expected = 120
        result = self.ex.fact(5)
        self.assertEqual(expected, result)
예제 #27
0
class TestConfiguration(unittest.TestCase):

	def setUp(self):
		self.ex = Example()

	def tearDown(self):
		del(self.ex)

	def testVariable(self):
		expected = 120
		result = self.ex.fact(5)
		self.assertEqual(expected, result)
예제 #28
0
 def update(self, list_of_examples):
     """
     Adds the Examples to the example_queue.
     
     :param list_of_examples: A list of new Examples.
     """
     for example_as_a_list in list_of_examples:
         if self.check_if_proper_example_coordinates(
                 coordinates=example_as_a_list[:-1]):
             self.example_queue.append(
                 Example(observation=example_as_a_list))
     self.batch_update()
예제 #29
0
 def test(self, example):
     """
     Given a list of coordinates and a class id at the last index, creates an Example object and classifies it.
     
     :param example: A list of coordinates and a class id at the last index.
     :return: Class id.
     """
     if not is_array_numeric(array=example[:-1]):
         print("Observation coordinates have to be numeric")
         return None
     example = Example(observation=example)
     return self.classify(example_coords=example.coords)
예제 #30
0
 def addTabEdit(self, path):
     if path is not None:
         e = Example()
         highLighter = MyHighlighter(self.parent.symbolWidget, parent=e.edit.document())
         self.listofHighlighters.append(highLighter)
         try:
             with open(path, 'r') as f:
                 text = f.read() 
                 e.edit.setText(text)                       
         except Exception:
             pass          
         self.dictOfTabsEdits.addPath(path, e)
         self.addTab(e, getFileName(path)) 
         e.edit.cursorPositionChanged.connect(self.parent.setValuesOfFormat)
     else: 
         e = Example()
         highLighter = MyHighlighter(self.parent.symbolWidget, parent=e.edit.document())
         self.listofHighlighters.append(highLighter)          
         self.dictOfTabsEdits.addPath(path, e)
         self.addTab(e, getFileName(path)) 
         e.edit.cursorPositionChanged.connect(self.parent.setValuesOfFormat)        
예제 #31
0
    def reject(self, test_example):

        u = self.u
        f = None
        e = Example()
        if type(test_example) == type(e):
            f = test_example.features_u

        else:
            f = test_example
        r = f.dot(u.T)
        return r
예제 #32
0
    def classify(self, test_example):

        w = self.w

        f = None
        e = Example()
        if type(test_example) == type(e):
            f = test_example.features_w

        else:
            f = test_example
        h = f.dot(w.T)

        return h
예제 #33
0
	def setUp(self):
		self.ex = Example()