Beispiel #1
0
def main():
    path = sys.argv[1]
    with open(path + 'system.out', 'w') as fout:
        entailment_out = open(path + '_entailment.out', 'w')
        for sentence1, sentence2 in load_candidates(path):
            output = generate_natural_language(sentence1) + '\t' + generate_natural_language(sentence2) + '|||'

            try:
                alignments = linear_align(generate_natural_language(sentence1), generate_natural_language(sentence2))
            except Exception as e:
                print e
                print >>fout, output + '\t'
                continue

            extraction1, extraction2 = generate_potential_extractions(sentence1, sentence2, alignments)
            extractions = [assign_slots(extraction1[0].replace('?', '>'), extraction1[4], extraction1[5]),
                           assign_slots(extraction2[0].replace('?', '<'), extraction2[5], extraction2[4])]
            extractions = parse_entailments(extractions, sentence1, sentence2, 's1:', 's2:')
            sentence1 = extract_sentence(sentence1, extractions, '>')
            sentence2 = extract_sentence(sentence2, extractions, '<')

            entailments = generate_potential_entailments(sentence1, sentence2, alignments)
            entailments = [assign_slots(entailment[0].replace('?', '>'), entailment[4], entailment[5]) for entailment in entailments] + \
                          [assign_slots(entailment[0].replace('?', '<'), entailment[5], entailment[4]) for entailment in entailments]

            entailments = parse_entailments(entailments, sentence1, sentence2, 's1:', 's2:')
            entailments, edge_entailments = create_entailments_dictionary(sentence1, sentence2, entailments)

            intersections = intersection(sentence1, sentence2, entailments)
            intersections = sorted(set([generate_natural_language(s, edge_entailments=edge_entailments) for s in intersections]))
            output += '\t'.join(intersections)
            print >>entailment_out, entailments
            print >>entailment_out, edge_entailments
            print >>fout, output
Beispiel #2
0
    def thanks(self, **kwargs):
        with self.lock:
            now = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
            user_id = kwargs['user_id']
            candidate_id = kwargs['candidate_id']
            alignments = kwargs['alignments']
            extractions = kwargs['extractions']
            entailments = kwargs['entailments']
            intersections = self.parse_intersections(kwargs)

            h = hashlib.new('sha1')
            h.update(''.join(('Lagi', user_id, candidate_id, 'Leshami')))
            code = ''.join((user_id, 'X', candidate_id, 'X', h.hexdigest()))

            annotation = '\t'.join([now, user_id, candidate_id, code, alignments, extractions, entailments] + intersections)
            with open('web/annotations', 'a') as fout:
                print >>fout, annotation
                print annotation
            with open('../vsbkp/annotations', 'a') as fout:
                print >>fout, annotation
                print annotation

            html = self.THANKS_HTML.replace('CODE', code)
            sentence1, sentence2 = self.candidates[candidate_id]
            html = html.replace('SENTENCE1', generate_natural_language(sentence1))
            html = html.replace('SENTENCE2', generate_natural_language(sentence2))
            html = html.replace('INTERSECTIONS', '</br>'.join(self.clean_intersections(intersections)))
        return html
Beispiel #3
0
 def load_candidates(path):
     candidates = []
     files = [join(path, f) for f in listdir(path)]
     for f in files:
         sentence1, sentence2, entailments = read_file(f)
         h = hashlib.new('sha1')
         h.update(generate_natural_language(sentence1) + ' ' + generate_natural_language(sentence2))
         candidates.append((h.hexdigest(), (sentence1, sentence2)))
     return dict(candidates)
Beispiel #4
0
 def load_candidates(path):
     candidates = []
     files = [join(path, f) for f in listdir(path)]
     for f in files:
         sentence1, sentence2, entailments = read_file(f)
         h = hashlib.new('sha1')
         h.update(
             generate_natural_language(sentence1) + ' ' +
             generate_natural_language(sentence2))
         candidates.append((h.hexdigest(), (sentence1, sentence2)))
     return dict(candidates)
def validate_alignments(sentence1, sentence2, actual_alignments):
    actual_alignments = [(map(int, seq1), map(int, seq2)) for seq1, seq2 in actual_alignments]
    sentence1 = generate_natural_language(sentence1).split(' ')
    sentence2 = generate_natural_language(sentence2).split(' ')
    high_prob_alignments = aligned_unigrams(sentence1, sentence2, 3)
    covered_alignments = sum(1 if covers_alignment(expected_alignment, actual_alignments) else 0 for expected_alignment in high_prob_alignments)
    print
    print actual_alignments
    print covered_alignments
    print len(high_prob_alignments)
    print
    return ((float(covered_alignments) + 2) / (len(high_prob_alignments) + 3)) > 0.66
Beispiel #6
0
def generate_potential_entailments_local(subtree1, subtree2, sentence1,
                                         sentence2, prerequisite, alignments):
    entailment = generate_entailment_string(subtree1, subtree2)
    prerequisite = generate_entailment_string(
        *prerequisite) if prerequisite is not None else ''

    args1, template1 = generate_template(sentence1, subtree1, sentence2,
                                         subtree2, alignments)
    args2, template2 = generate_template(sentence2, subtree2, sentence1,
                                         subtree1, alignments)

    return entailment, prerequisite, generate_natural_language(
        sentence1, subtree1), generate_natural_language(
            sentence2, subtree2), args1, args2, template1, template2
Beispiel #7
0
def validate_alignments(sentence1, sentence2, actual_alignments):
    actual_alignments = [(map(int, seq1), map(int, seq2))
                         for seq1, seq2 in actual_alignments]
    sentence1 = generate_natural_language(sentence1).split(' ')
    sentence2 = generate_natural_language(sentence2).split(' ')
    high_prob_alignments = aligned_unigrams(sentence1, sentence2, 3)
    covered_alignments = sum(
        1 if covers_alignment(expected_alignment, actual_alignments) else 0
        for expected_alignment in high_prob_alignments)
    print
    print actual_alignments
    print covered_alignments
    print len(high_prob_alignments)
    print
    return ((float(covered_alignments) + 2) /
            (len(high_prob_alignments) + 3)) > 0.66
Beispiel #8
0
 def index(self):
     with self.lock:
         html = self.LOGIN_HTML
         candidate_options = [
             (candidate_id, ''.join(
                 (generate_natural_language(sentence1)[:20], '... | ',
                  generate_natural_language(sentence2)[:20], '...')))
             for candidate_id, (sentence1,
                                sentence2) in self.candidates.iteritems()
         ]
         candidate_options = [
             CANDIDATE_HTML.replace('CANDIDATE_ID', candidate_id).replace(
                 'CANDIDATE', candidate)
             for candidate_id, candidate in candidate_options
         ]
         html = html.replace('CANDIDATES', ''.join(candidate_options))
     return html
Beispiel #9
0
 def index(self):
     with self.lock:
         html = self.LOGIN_HTML
         candidate_options = [(candidate_id, ''.join((generate_natural_language(sentence1)[:20], '... | ', generate_natural_language(sentence2)[:20], '...')))
                              for candidate_id, (sentence1, sentence2) in self.candidates.iteritems()]
         candidate_options = [CANDIDATE_HTML.replace('CANDIDATE_ID', candidate_id).replace('CANDIDATE', candidate)
                              for candidate_id, candidate in candidate_options]
         html = html.replace('CANDIDATES', ''.join(candidate_options))
     return html
Beispiel #10
0
def generate_template(hypothesis_tree, hypothesis_subtree, premise_tree,
                      premise_subtree, alignments):
    hypothesis_tree = DynamicTree(None, hypothesis_tree.root)
    outgoing_edges = get_edges_from_subtree_to_tree(hypothesis_subtree,
                                                    hypothesis_tree)
    aligned_edges = {
        edge: get_aligned_edge_id(edge, alignments)
        for edge in outgoing_edges
    }
    interesting_edges = [
        edge for edge in outgoing_edges
        if edge.is_slot() or aligned_edges[edge] is not None
    ]

    # Generate the original arguments (children) of the hypothesis subtree
    args = [(edge.id_.split(':')[1], generate_natural_language(edge.modifier),
             aligned_edges[edge]) for edge in interesting_edges]

    # Replace hypothesis subtree's children with placeholders
    slot_i = 0
    for edge in outgoing_edges:
        if edge in interesting_edges:
            edge.modifier.children = []
            edge.modifier.word = 'SLOT' + str(slot_i) + '!'
            slot_i += 1
        else:
            edge.head.children.remove(edge)

    # Replace hypothesis subtree's parents with the (aligned) premise subtree's parents
    hypothesis_subtree_root = hypothesis_tree.find_node(
        hypothesis_subtree.root)
    if premise_tree.root == premise_subtree.root:
        premise_tree = DynamicTree(None, hypothesis_subtree_root)
    else:
        premise_tree = DynamicTree(None, premise_tree.root)
        premise_tree.find_parent_edge(
            premise_subtree.root).modifier = hypothesis_subtree_root

    # Generate the template
    template = generate_natural_language(premise_tree, hypothesis_subtree)

    return args, template
Beispiel #11
0
 def intersection(self, sentence1, sentence2, entailments_str):
     entailments = self.get_real_entailments(sentence1, sentence2,
                                             entailments_str)
     entailments, edge_entailments = create_entailments_dictionary(
         sentence1, sentence2, entailments)
     intersections = intersection(sentence1, sentence2, entailments)
     return sorted(
         set([
             generate_natural_language(s, edge_entailments=edge_entailments)
             for s in intersections
         ]))
Beispiel #12
0
    def thanks(self, **kwargs):
        with self.lock:
            now = datetime.datetime.strftime(datetime.datetime.now(),
                                             '%Y-%m-%d %H:%M:%S')
            user_id = kwargs['user_id']
            candidate_id = kwargs['candidate_id']
            alignments = kwargs['alignments']
            extractions = kwargs['extractions']
            entailments = kwargs['entailments']
            intersections = self.parse_intersections(kwargs)

            h = hashlib.new('sha1')
            h.update(''.join(('Lagi', user_id, candidate_id, 'Leshami')))
            code = ''.join((user_id, 'X', candidate_id, 'X', h.hexdigest()))

            annotation = '\t'.join([
                now, user_id, candidate_id, code, alignments, extractions,
                entailments
            ] + intersections)
            with open('web/annotations', 'a') as fout:
                print >> fout, annotation
                print annotation
            with open('../vsbkp/annotations', 'a') as fout:
                print >> fout, annotation
                print annotation

            html = self.THANKS_HTML.replace('CODE', code)
            sentence1, sentence2 = self.candidates[candidate_id]
            html = html.replace('SENTENCE1',
                                generate_natural_language(sentence1))
            html = html.replace('SENTENCE2',
                                generate_natural_language(sentence2))
            html = html.replace(
                'INTERSECTIONS',
                '</br>'.join(self.clean_intersections(intersections)))
        return html
Beispiel #13
0
 def generate_sentence_html(sentence, si):
     return ' '.join([
         TOKEN_HTML.replace('SINDEX', str(si + 1)).replace(
             'TINDEX', str(ti)).replace('TOKEN', t) for ti, t in enumerate(
                 generate_natural_language(sentence).split(' '))
     ])
Beispiel #14
0
 def generate_sentence_html(sentence, si):
     return ' '.join([TOKEN_HTML.replace('SINDEX', str(si+1)).replace('TINDEX', str(ti)).replace('TOKEN', t)
                      for ti, t in enumerate(generate_natural_language(sentence).split(' '))])
Beispiel #15
0
 def intersection(self, sentence1, sentence2, entailments_str):
     entailments = self.get_real_entailments(sentence1, sentence2, entailments_str)
     entailments, edge_entailments = create_entailments_dictionary(sentence1, sentence2, entailments)
     intersections = intersection(sentence1, sentence2, entailments)
     return sorted(set([generate_natural_language(s, edge_entailments=edge_entailments) for s in intersections]))