def setUp(self):
     self.tasks = {}
     for lang in [('en', 'E'), ('ja', 'J')]:
         queryfilepath = create_query_subset(
             './data/MC2-training/%s/1C2-%s-queries.tsv' % lang,
             './data/MC2-training-documents/1C2-%s.INDX/' % lang[1])
         intentfilepath = create_tmp_intent_file(queryfilepath)
         self.tasks[lang[1]] = Task.read(queryfilepath,
             './data/MC2-training/%s/1C2-%s-iunits.tsv' % lang,
             './data/MC2-training-documents/1C2-%s.INDX/' % lang[1],
             './data/MC2-training-documents/1C2-%s.HTML/' % lang[1],
             intentfilepath)
예제 #2
0
 def test_task_read(self):
     """
     Task.read (with intents)
     """
     tasks = Task.read(
         self.queryfilepath,
         "./data/MC2-training/en/1C2-E-iunits.tsv",
         "./data/MC2-training-documents/1C2-E.INDX/",
         "./data/MC2-training-documents/1C2-E.HTML/",
         self.intentfilepath,
     )
     self.assertEqual(len(tasks), 5)
     self.assertEqual(tasks[0].query.qid, "1C2-E-0001")
     self.assertEqual(len(tasks[0].intents), 5)
     self.assertEqual(tasks[0].intents[0].qid, "1C2-E-0001")
     self.assertEqual(tasks[0].intents[0].iid, "1C2-E-0001-INTENT0001")
예제 #3
0
 def test_task_read(self):
     '''
     Task.read
     '''
     tasks = Task.read(self.queryfilepath,
         './data/MC2-training/en/1C2-E-iunits.tsv',
         './data/MC2-training-documents/1C2-E.INDX/',
         './data/MC2-training-documents/1C2-E.HTML/')
     self.assertEqual(len(tasks), 5)
     self.assertEqual(tasks[0].query.qid, '1C2-E-0001')
     self.assertEqual(len(tasks[0].iunits), 19)
     self.assertEqual(tasks[0].iunits[0].qid, '1C2-E-0001')
     self.assertEqual(tasks[0].iunits[0].uid, '1C2-E-0001-0001')
     self.assertEqual(len(tasks[0].indices), 213)
     self.assertEqual(tasks[0].indices[0].qid, '1C2-E-0001')
     self.assertEqual(tasks[0].indices[0].rank, 1)
예제 #4
0
        Index.qid: Query ID
        Index.filepath: filepath of an HTML file
        Index.rank: rank in a search engine result page
        Index.title: webpage title
        Index.url: webpage url
        Index.body: summary of the webpage
        """

    def rank(self, task):
        """
        Output ranked pairs of an iUnits and a score

        e.g. Random ranking method
        return [(i, 0) for i in task.iunits]
        """
        return [(i, 0) for i in task.iunits]


if __name__ == "__main__":
    from mobileclick import Task

    tasks = Task.read(
        "data/MC2-training/en/1C2-E-queries.tsv",
        "data/MC2-training/en/1C2-E-iunits.tsv",
        "data/MC2-training-documents/1C2-E.INDX",
        "data/MC2-training-documents/1C2-E.HTML",
    )
    method = YourRankingMethod()
    run = method.generate_run("YourRun", "This is your run", tasks)
    run.save("./")
        summary.add(iunit6) # added to the first layer
        summary.add(iunit7, intent1.iid) # added to the second layer
        summary.add(intent3) # added to the first layer
        summary.add(iunit8, intent3.iid) # added to the second layer

        The resultant summary is
            First layer:
                iunit1, iunit2, intent1, intent2, iunit6, intent3
            Second layer:
                intent1: iunit3, iunit4, iunit7
                intent2: iunit3, iunit5
                intent3: iunit8

        e.g. Random summarization method
        return Summary(task.query.qid, task.iunits)
        '''
        return Summary(task.query.qid, task.iunits)

if __name__ == '__main__':
    from mobileclick import Task
    tasks = Task.read(
        "data/MC2-test/en/MC2-E-queries.tsv",
        "data/MC2-test/en/MC2-E-iunits.tsv",
        "data/MC2-test-documents/MC2-E.INDX",
        "data/MC2-test-documents/MC2-E.HTML",
        "data/MC2-test/en/MC2-E-intents.tsv")
    method = YourSummarizationMethod()
    run = method.generate_run("YourRun", "This is your run", tasks)
    run.save('./')