def _process_buff(self, buff):
        """
    :param buff:
    :return:
    """
        len_cntr = Counter()
        for sent in buff:
            len_cntr[len(sent)] += 1
        bkts_splits = KMeans(self.n_bkts, len_cntr).splits
        # Count the sents length
        # Use k-means to splits the sents into n_bkts parts

        # reset bucket size
        # map the lenth to bkts id
        prev_size = -1
        for bkt_idx, size in enumerate(bkts_splits):
            self.buckets[bkt_idx].set_size(size)
            self.len2bkts.update(
                zip(range(prev_size + 1, size + 1),
                    [bkt_idx] * (size - prev_size)))
            prev_size = size
            # map all length from min to max to bkts id
            # some of lengths do not appear in the data set
        for sent in buff:
            # Add the sent to the specific bucket according to their length
            # Construct the sent into example first
            # And then push them into buckets
            bkt_idx = self.len2bkts[len(sent)]
            example = Example(sent, self._config)
            example.convert(self.vocabs)
            # save to bucket
            idx = self.buckets[bkt_idx].add(example)
            self.id2position.append((bkt_idx, idx))
Esempio n. 2
0
def construct_examples(raw_data):
    examples = []
    for o in raw_data:
        d = o['reviews'][0]
        if None in [d['text'], d['rating']]:
            continue
        if d['title'] is None:
            d['title'] = ""

        review = d['title'] + " " + " STOP START ".join(d['text'])

        if 'gender' in o and 'birth_year' in o:
            if o['gender'] is None or o['birth_year'] is None:
                continue
            gen = map_gender[o['gender']]
            age = bucket_age(o['birth_year'], d['date'])

            if age != None:

                meta = set()
                if gen:
                    meta.add(GENDER)
                if age:
                    meta.add(BIRTH)
                ex = Example(review, int(d['rating']) - 1, metadata=meta)

                if len(ex.get_sentence()) == 0:
                    continue
                examples.append(ex)
    return examples
Esempio n. 3
0
def read_data(filename):

    examples = []

    for line in open(filename):

        line = line.strip().split("\t")

        topic = line[0]
        age = line[1]
        gender = line[2]
        user = line[3]
        text = line[4]

        if topic != "None":
            meta = set()
            if age == "1":
                meta.add(0)
            if gender == "f":
                meta.add(1)

            topic = int(topic)

            examples.append(Example(text, topic, meta))

    return examples
Esempio n. 4
0
def preprocess_data(model, raw):
    in_vocabulary = model.in_vocabulary
    out_vocabulary = model.out_vocabulary
    domain_stats_vocab = model.domain_stats_vocab
    lexicon = model.lexicon

    data = []
    for raw_ex in raw:
        x_str, y_str, x_orig, y_orig, x_orig_same, y_orig_same, src_domain, pos, src_domain_stats = raw_ex
        d_inds = [
            domain_stats_vocab.domain_to_index[src_domain]
            for x in out_vocabulary.sentence_to_indices(y_str)
        ]
        ex = Example(x_str,
                     y_str,
                     x_orig,
                     y_orig,
                     x_orig_same,
                     y_orig_same,
                     src_domain,
                     pos,
                     in_vocabulary,
                     out_vocabulary,
                     d_inds,
                     lexicon,
                     reverse_input=OPTIONS.reverse_input)
        data.append(ex)
    return data
Esempio n. 5
0
def Make_feature_file(authorIdPaperIds, dict_coauthor,
                      dict_paperIdAuthorId_to_name_aff, PaperAuthor, Author,
                      feature_function_list, to_file):
    example_list = []
    dimension = 0

    process_bar = pyprind.ProgPercent(len(authorIdPaperIds))
    for authorIdPaperId in authorIdPaperIds:
        process_bar.update()

        features = [
            feature_function(authorIdPaperId, dict_coauthor,
                             dict_paperIdAuthorId_to_name_aff, PaperAuthor,
                             Author)
            for feature_function in feature_function_list
        ]
        #合并特征
        feature = util.mergeFeatures(features)
        dimension = feature.dimension
        #特征target
        target = authorIdPaperId.label
        if target is None:
            target = "-1"
        #example
        example = Example(target, feature)
        # example.comment = json.dumps({"paperId": authorIdPaperId.paperId, "authorId": authorIdPaperId.authorId})
        example.comment = "%s %s" % (authorIdPaperId.paperId,
                                     authorIdPaperId.authorId)

        example_list.append(example)

    util.write_example_list_to_file(example_list, to_file)
    # to arff file
    util.write_example_list_to_arff_file(example_list, dimension,
                                         to_file + ".arff")
Esempio n. 6
0
def preprocess_data(model, raw):
    in_vocabulary = model.in_vocabulary
    out_vocabulary = model.out_vocabulary
    #if OPTIONS.model=='attn2hist':
    #  domain_size = model.domain_size
    #else:
    #print(len(DOMAINS))
    domain_size = len(DOMAINS)  #doma
    lexicon = model.lexicon
    #print('lexicon:',lexicon)
    #print('raw:',raw)
    #print('in_vocabulary:',in_vocabulary)
    #print('out_vocabulary:',out_vocabulary)
    data = []
    for raw_ex in raw:
        x_str, y_str, z_str = raw_ex
        ex = Example(x_str,
                     y_str,
                     z_str,
                     in_vocabulary,
                     out_vocabulary,
                     domain_size,
                     lexicon,
                     reverse_input=OPTIONS.reverse_input)
        data.append(ex)
    return data
Esempio n. 7
0
def read(filename):
    """
    Read the Iris csv file and construct the Example objects.
    The file is assumed to contain 5 columns:
    sepal length, sepal width, petal length, petal width and training label.
    The first 4 columns will be used as features in the feature vector
    constructed.

    :param filename: Name of the csv file containing iris data
    :return: A list of Example objects
    """
    data = []
    labels = []
    with open(filename, 'r', encoding='utf-8') as label_file:
        for each_line in label_file:
            example_data = each_line.strip().split(',')
            num_features = len(example_data)
            feature_vector = np.zeros(num_features, float)
            feature_vector[0] = 1  # bias
            for count in range(num_features - 1):
                feature = float(example_data[count])
                feature_vector[count + 1] = feature
            label = example_data[num_features - 1]
            example = Example(label, feature_vector)
            data.append(example)
    return data
Esempio n. 8
0
def accept():
    if request.method == 'POST':
        print("HALP!!")
        if 'place' not in request.form:
            return "No place in the input!"
        if 'stage' not in request.form:
            return "No stage in the input!"
        place = str(request.form['place'])
        stage = str(request.form['stage'])
        if ((len(place) < 1) or (len(stage) < 1)):
            method = str(request.form['method'])
            flash("Please enter proper place and stage", "danger")
            return redirect(url_for("home"))
        # formattedString = changeRingingStringChecker(place, stage)
        # audioMaker(formattedString)
        # imageMaker(formattedString)
        print("Building example")

        formattedString = notationReader(place, stage)
        example = Example('audio/'+formattedString+'.wav', 'images/'+formattedString+'.jpg')

        methodPlayer(formattedString)
        methodDrawer(formattedString)
        return render_template('results.html', example=example, formattedString=formattedString)
    if request.method == 'GET':
        return "A get request to accept?!?"
Esempio n. 9
0
    def get_batch_generator(self, mode="train", single_pass=False):
        """Get a generator which is to yield a Batch Instance

        mode: can be train, eval, infer
        single_pass: if True, then this
        """

        assert mode in ["train", "eval", "infer"], "model can be {train, eval, infer}"
        mode_data = self.video_data[self.video_data["mode"] == mode]
        video_captions = zip(mode_data["video_path"].values, mode_data["caption"].values)
        example_num = len(video_captions)
        print "mode = {mode} and the sample num is {sample_num}".format(mode=mode, sample_num=example_num)
        while True:
            random.shuffle(video_captions)
            for start, end in zip(
                    range(0, example_num, self.hparams.batch_size),
                    range(self.hparams.batch_size, example_num, self.hparams.batch_size)):
                example_list = video_captions[start: end]

                def _load_video_feat(video_path):
                    return np.load(video_path)
                example_list = map(lambda example: Example(_load_video_feat(example[0]), example[1],
                                                           vocab=self.hparams.word2id), example_list)
                yield Batch(example_list)

            if self.single_pass or single_pass:
                print "infer mode: no more data"
                break
Esempio n. 10
0
def read(images, labels):
    """
    Read the digits image and label files and build the
    example objects with the feature vectors.
    :param images: Name of the text file containing the images
    :param labels: Name of the text file containing the labels
    :return: A list of Example objects
    """
    data = []
    with open(labels, 'r', encoding='utf-8') as label_file:
        for each_line in label_file:
            example = Example(int(each_line.strip()))
            data.append(example)

    image_row = 0
    count = 0
    with open(images, 'r', encoding='utf-8') as image_file:
        for each_line in image_file:
            if image_row == 0:
                feature_vector = np.zeros(NUM_FEATURES, int)
                feature_vector[0] = 1  # bias
                feature = 1
            for each_char in each_line[0:IMAGE_SIZE]:
                feature_vector[feature] = 0 if each_char == ' ' else 1
                feature += 1
            image_row = (image_row + 1) % IMAGE_SIZE
            if image_row == 0:
                data[count].fvector = feature_vector
                count += 1
    data[-1].fvector = feature_vector
    return data
Esempio n. 11
0
def run_eval():
    import csv
    assert OPTIONS.load_file is not None
    assert OPTIONS.input is not None
    train_raw = load_dataset(OPTIONS.train_data)
    random.seed(OPTIONS.model_seed)
    numpy.random.seed(OPTIONS.model_seed)
    spec = init_spec(train_raw)
    model = get_model(spec)

    reader = csv.reader(OPTIONS.input, delimiter='\t')
    writer = csv.writer(OPTIONS.output, delimiter='\t')
    header = next(reader)
    #assert header == ['id', 'input']
    writer.writerow(['id', 'input', 'output', 'score'])
    for id, input in reader:
        s = input.strip()
        example = Example(s,
                          '',
                          model.in_vocabulary,
                          model.out_vocabulary,
                          model.lexicon,
                          reverse_input=OPTIONS.reverse_input)

        deriv = decode(model, example)[0]
        output = " ".join(deriv.y_toks).strip()
        score = deriv.p
        writer.writerow([id, input, output, score])
Esempio n. 12
0
 def _load_examples(self, klass, example_group):
     for example in self._examples_in(klass):
         tags = example._tags
         if self._is_pending_example(example) or self._is_pending_example_group(example_group):
             example_group.append(PendingExample(
                 example, tags=tags, module=self.module))
         else:
             example_group.append(
                 Example(example, tags=tags, module=self.module))
Esempio n. 13
0
def make_examples(filename):
    examples = []
    with open(filename) as f:
        raw = json.load(f)
        for raw_example in raw:
            # TODO: support multiple anser
            examples.append(
                Example(input=preprocess(raw_example['text']),
                        denotation=raw_example['ans_simple']))
    return examples
Esempio n. 14
0
def upload_audio():
    if request.method == 'POST':
        # check if the post request has the file part
        if 'file' not in request.files:
            flash('No file part', 'danger')
            return redirect(url_for('home'))
        file = request.files['file']

        # if user does not select file, browser also
        # submit a empty part without filename
        if file.filename == '':
            flash('No selected file', 'danger')
            return redirect(url_for('home'))
        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
            filelocation = os.path.join(app.config['UPLOAD_FOLDER'], filename)
            print(filelocation)

            #get variables from the user's form
            layer = request.form['layer']
            channel = int(request.form['channel'])
            # path_to_audio = request.form['path_to_audio']
            iterations = int(request.form['iterations'])
            octaves = int(request.form['octaves'])
            path_to_audio = "./audio/" + str(filename)
            audio_name = filename.split('.')[0]
            print("Audio name is: ")
            print(audio_name)

            print("The forms data:")
            print(layer, channel, path_to_audio, iterations, octaves)
            #run the function
            return_object = deepdream_func(layer, channel, path_to_audio,
                                           iterations, octaves, audio_name)
            if (return_object == -1):
                #return error message
                print("Showing error flash?!?")
                flash(
                    'Please select a channel that is in range for this layer',
                    'danger')
                return redirect(url_for('home'))
            else:
                print(return_object)
                #return image
                ex = Example(
                    os.path.join('/audio', return_object['audio_filename']),
                    "/images/in.jpg", return_object['audio_filename_new'],
                    "/images/out.jpg")
                return render_template('results.html', example=ex)

            # return 'upload complete'
    elif request.method == 'GET':
        return send_from_directory("uploads", "the_books.mp3")
    return
Esempio n. 15
0
    def examples(self):
        all_examples = []

        entities, intents = self.parse()

        for name, intent in intents.items():
            for text in intent['examples']:
                example = Example(text, name, entities)
                all_examples.append(example)

        return all_examples
Esempio n. 16
0
 def add_examples(self, list_of_examples):
     """
     Adds all of the provided Examples to BasicGrid.
     
     :param list_of_examples: A list of Examples.
     """
     for example_as_a_list in list_of_examples:
         if self.check_if_proper_example_coordinates(
                 coordinates=example_as_a_list[:-1]):
             self.basic_grid.add_example_to_grid(
                 example=Example(example_as_a_list))
Esempio n. 17
0
 def update(self, list_of_examples):
     """
     Adds the Examples to the example_queue.
     
     :param list_of_examples: A list of new Examples.
     """
     for example_as_a_list in list_of_examples:
         if self.check_if_proper_example_coordinates(
                 coordinates=example_as_a_list[:-1]):
             self.example_queue.append(
                 Example(observation=example_as_a_list))
     self.batch_update()
Esempio n. 18
0
 def test(self, example):
     """
     Given a list of coordinates and a class id at the last index, creates an Example object and classifies it.
     
     :param example: A list of coordinates and a class id at the last index.
     :return: Class id.
     """
     if not is_array_numeric(array=example[:-1]):
         print("Observation coordinates have to be numeric")
         return None
     example = Example(observation=example)
     return self.classify(example_coords=example.coords)
Esempio n. 19
0
 def addTabEdit(self, path):
     if path is not None:
         e = Example()
         highLighter = MyHighlighter(self.parent.symbolWidget, parent=e.edit.document())
         self.listofHighlighters.append(highLighter)
         try:
             with open(path, 'r') as f:
                 text = f.read() 
                 e.edit.setText(text)                       
         except Exception:
             pass          
         self.dictOfTabsEdits.addPath(path, e)
         self.addTab(e, getFileName(path)) 
         e.edit.cursorPositionChanged.connect(self.parent.setValuesOfFormat)
     else: 
         e = Example()
         highLighter = MyHighlighter(self.parent.symbolWidget, parent=e.edit.document())
         self.listofHighlighters.append(highLighter)          
         self.dictOfTabsEdits.addPath(path, e)
         self.addTab(e, getFileName(path)) 
         e.edit.cursorPositionChanged.connect(self.parent.setValuesOfFormat)        
Esempio n. 20
0
    def reject(self, test_example):

        u = self.u
        f = None
        e = Example()
        if type(test_example) == type(e):
            f = test_example.features_u

        else:
            f = test_example
        r = f.dot(u.T)
        return r
Esempio n. 21
0
def run_shell(model):
  print('==== Neural Network Semantic Parsing REPL ====')
  print('')
  print('Enter an utterance:')
  while True:
    s = raw_input('> ').strip()
    example = Example(s, '', '', {}, model.in_vocabulary, model.out_vocabulary, reverse_input=OPTIONS.reverse_input)
    print('')
    print('Result:')
    preds = decode(model, example)
    for prob, y_toks in preds[:10]:
      y_str = ' '.join(y_toks)
      print('  [p=%f] %s' % (prob, y_str))
    print('')
Esempio n. 22
0
    def classify(self, test_example):

        w = self.w

        f = None
        e = Example()
        if type(test_example) == type(e):
            f = test_example.features_w

        else:
            f = test_example
        h = f.dot(w.T)

        return h
Esempio n. 23
0
 def change(self, insts):
     exams = []
     for inst in insts:
         example = Example()
         for w in inst.words:
             if w in self.word_AlphaBet.list:
                 example.wordIndexs.append(self.word_AlphaBet.dict[w])
             else:
                 example.wordIndexs.append(self.hyperpara.unknow_id)
         for l in inst.labels:
             labelId = self.label_AlphaBet.dict[l]
             example.labelIndexs.append(labelId)
         exams.append(example)
     return exams  #每句话的句子和标签的ID
Esempio n. 24
0
  def post_query():
    query = bottle.request.params.get('query')
    print 'Received query: "%s"' % query
    example = Example(query, '', '', {}, model.in_vocabulary, model.out_vocabulary, reverse_input=OPTIONS.reverse_input)
    preds = decode(model, example)
    lines = ['<b>Query: "%s"</b>' % query, '<ul>']
    for i, deriv in enumerate(preds[:10]):
      y_str = ' '.join(deriv.y_toks)
      lines.append('<li> %d. [p=%f] %s' % (i, deriv.p, y_str))
      lines.append(make_heatmap(query, y_str, deriv.attention_list, deriv.copy_list))
    lines.append('</ul>')

    content = '\n'.join(lines)
    return bottle.template('main', prompt='Enter a new query', content=content)
Esempio n. 25
0
def get_from(folder):
    
    neg_file = "{}/neg_examples".format(folder)
    pos_file = "{}/pos_examples".format(folder)
    
    examples = []
    
    sys.stderr.write("  Loading negative examples...\n")
    for line in open(neg_file):
        line = line.strip()
        if line:
            examples.append(Example(line, 0))
        if len(examples) > 1000:
            break
    sys.stderr.write("  Done.\n")
    sys.stderr.write("  Loading positive examples...\n")
    for line in open(pos_file):
        line = line.strip()
        if line:
            examples.append(Example(line, 1))
        if len(examples) > 2000:
            break
    sys.stderr.write("  Done.\n")
    return examples
Esempio n. 26
0
 def change(self, file_train):
     i, j, x, y = self.extract_feature(file_train)
     all_examples = []
     for idx in range(len(i)):
         m = i[idx]
         example = Example()
         for a in m:
             if a in self.word_AlphaBet.dict:
                 example.m_word_indexes.append(self.word_AlphaBet.dict[a])
         label_list = [0, 0, 0, 0, 0]
         b = int(j[idx])
         label_list[b] = 1
         example.m_label_index = label_list
         all_examples.append(example)
     return all_examples
Esempio n. 27
0
def preprocess_data(model, raw):
  in_vocabulary = model.in_vocabulary
  out_vocabulary = model.out_vocabulary
  lexicon = model.lexicon
  #print('lexicon:',lexicon)
  #print('raw:',raw)
  #print('in_vocabulary:',in_vocabulary)
  #print('out_vocabulary:',out_vocabulary)
  data = []
  for raw_ex in raw:
    x_str, y_str = raw_ex
    ex = Example(x_str, y_str, in_vocabulary, out_vocabulary, lexicon,
                 reverse_input=OPTIONS.reverse_input)
    data.append(ex)
  return data
Esempio n. 28
0
    def examples(self):
        class_labels = self._readmap("image_class_labels")
        images = self._readmap("images")
        train_test_split = self._readmap("train_test_split")

        results = []

        for id in images:
            results.append(
                Example(id=int(id),
                        path=images[id][:-4],
                        species=int(class_labels[id]),
                        is_training=int(train_test_split[id]) == 1,
                        datadir=self.path))

        return results
Esempio n. 29
0
def get_dataset(k = 4):
    
    keys = ["source", "url", "title", "image", "category", "description", "rank", "pubdate"]
    filename = "../datasets/newsspace200.xml"
    
    xml_tree = ET.parse(filename)
    root = xml_tree.getroot()
    
    categories = ["World", "Entertainment", "Sports", "Business"]
    # "Top Stories", "Sci/Tech", "Top News", "Europe", "Health", "Italia", "U.S."]
    label_map = dict(zip(categories, range(len(categories))))
    
    #sources = ["Yahoo Business", "Reuters Business", "Washington Post Business", "BBC News Business"]
    #source_map = dict(zip(sources, range(len(sources))))
    
    
    examples = []
    
    i = 0
    d = []
    for c in root:
        assert(c.tag == keys[i%len(keys)])
        d.append(c.text)
        if len(d) == len(keys):
            if d[4] in label_map:
                description = d[2]
                if d[5] is not None:
                    description +=  "  " + d[5]
                ex = Example(preprocess(description), 
                             label = label_map[d[4]],
                             #metadata = [source_map[d[0]]])
                             )
                examples.append(ex)
            d = []
        i += 1

    examples = ner.tags_NE(examples, "ag_corpus", k=k)
    
    random.shuffle(examples)
    
    l = len(examples) // 10
    test, dev, train = examples[:l], examples[l:2*l], examples[2*l:]

    return train, dev, test
Esempio n. 30
0
def preprocess_data(model, raw):
    in_vocabulary = model.in_vocabulary
    out_vocabulary = model.out_vocabulary
    domain_vocabulary = model.domain_vocabulary
    lexicon = model.lexicon

    data = []
    for raw_ex in raw:
        x_str, y_str, sub_domain = raw_ex
        ex = Example(x_str,
                     y_str,
                     in_vocabulary,
                     out_vocabulary,
                     domain_vocabulary,
                     lexicon,
                     reverse_input=OPTIONS.reverse_input,
                     sub_domain=sub_domain)
        data.append(ex)
    return data