Python Corpusの例、corpusMturk.readCorpus.Corpus Pythonの例

コード例 #1

0

ファイルを表示

ファイル: print_logs_from_corpus.py プロジェクト: h2r/slu_core

def main():
    corpus = readCorpus.Corpus("dataCollection/data/corpusCommandsForVideoSmallFilesOnly/")
    
    filtered_annotations = [x for x in corpus.assignments
                           if (x.scenario.platform == "forklift" and
                               x.scenario.isSimulated == True)]

    assignments = set(a.scenario for a in filtered_annotations)
    for a in assignments:
        print "/".join(a.lcmlog.split("/")[-2:])

コード例 #2

0

ファイルを表示

def main():
    corpus = readCorpus.Corpus(
        "dataCollection/data/corpusCommandsForVideoSmallFilesOnly/")

    iserVerbs = ["meet", "bring", "avoid", "follow"]
    for annotation in corpus.annotations:
        scenario = annotation.scenario
        for verb in iserVerbs:
            if verb in annotation.command:
                print verb, scenario.youtubeId, scenario.name,
                print annotation.command

コード例 #3

0

ファイルを表示

def main():
    corpus = readCorpus.Corpus(
        "../mturk_data_collector/data/corpusCommandsForVideoSmallFilesOnly/")

    annotators = set()
    scenarios = set()
    commands = []
    words = []

    commands_per_scenario = collections.defaultdict(lambda: 0)
    discourses = []
    current_scenario = None
    for annotation in corpus.assignments:
        scenario = annotation.scenario

        if current_scenario != scenario:
            current_scenario = scenario
            print "scenario", scenario.name

        if scenario.platform == "forklift" and scenario.isSimulated and len(
                annotation.command) <= 150:
            scenarios.add(scenario)
            annotators.add(annotation.workerId)
            words.extend(annotation.command.split())
            discourses.append(annotation.command)
            #print "*", annotation.command, scenario.youtubeUrl
            commands.append(annotation.command)
            print annotation.command
            commands_per_scenario[scenario.name] += 1

    print len(annotators), "annotators"
    print len(scenarios), "videos"
    print len(commands), "commands"
    print len(words), "words"
    print len(set(words)), "unique words"
    posTagger = pos_tagger.makeTagger()
    pos_histograms(discourses, posTagger)
    for scenario in sorted(scenarios):
        print scenario, commands_per_scenario[scenario.name]

    for word in [
            "put", "take", "bring", "go", "move", "to", "on", "across", "the",
            "cymbal"
    ]:
        print word + ":",
        print len([w for w in words if w.lower() == word])

    mean_commands_per_scenario = na.mean(commands_per_scenario.values())
    print "mean commands", mean_commands_per_scenario

コード例 #4

0

ファイルを表示

ファイル: exportParses.py プロジェクト: h2r/slu_core

def main():
    parser = OptionParser()
    parser.add_option("--source_corpus",
                      dest="source_corpus",
                      help="The file or directory to read from")
    parser.add_option("--dest_corpus",
                      dest="dest_corpus",
                      help="The yaml file to which data will be written")
    (options, args) = parser.parse_args()

    corpus = readCorpus.Corpus(options.source_corpus)
    extractor = Extractor()
    #oldCorpus = annotationIo.load("%s/dataAnnotation/data/forkliftMturkEsdcs.stefie10.groundings.withPaths.yaml" % os.environ['FORKLIFT_HOME'])
    #oldAssignmentIds = set(a.assignmentId for a in oldCorpus)
    #print len(oldAssignmentIds), "old ids"
    yamlData = []
    print len(corpus.assignments), "assignments"
    filteredAnnotations = [
        x for x in corpus.assignments if (x.scenario.platform == "forklift"
                                          and x.scenario.isSimulated == True)
    ]
    print len(filteredAnnotations), "filtered annotations"
    output_annotations = []
    for i, a in enumerate(filteredAnnotations):
        esdcs = extractor.extractEsdcs(a.command)
        assignmentId = a.assignmentId
        agent = a.agent
        context = a.context
        esdcSource = [assignmentId for esdc in esdcs.flattenedEsdcs]
        output_annotations.append(
            Annotation(assignmentId=assignmentId,
                       esdcs=esdcs,
                       objectGroundings=None,
                       groundingIsCorrect=None,
                       agent=agent,
                       context=context,
                       esdcSource=esdcSource))
        esdcYaml = toYaml(esdcs)
        print "******"
        print a, esdcYaml
        print esdcs

        yamlData.append({"command": esdcYaml, "assignmentId": a.assignmentId})

    print "dumped", i, "commands."
    yaml.dump(yamlData, open(options.dest_corpus, "w"))

コード例 #5

0

ファイルを表示

ファイル: words.py プロジェクト: h2r/slu_core

def main():
    import sys
    fname = sys.argv[1]
    assignments = readCorpus.Corpus(
        "dataCollection/data/corpusCommandsForVideoSmallFilesOnly/")
    corpus = annotationIo.load(fname)

    word_cnt = 0
    workers = set()
    scenarios = set()

    for annotation in corpus:

        assignment = assignments.assignmentForId(annotation.assignmentId)
        word_cnt += len(annotation.entireText.split())
        workers.add(assignment.workerId)
        scenarios.add(assignment.scenario)
        if assignment.scenario.name == "put_tire_pallet_on_loaded_truck":
            print "command", assignment.scenario.name, annotation.entireText

    print len(scenarios), "scenarios"
    print len(workers), "annotators"
    print word_cnt, "words"
    print len(corpus), "commands"

コード例 #6

0

ファイルを表示

def main():

    parser = OptionParser()

    parser.add_option("--outfile_training",
                      dest="training_fname",
                      help="Training Output Filename")
    parser.add_option("--outfile_test",
                      dest="testing_fname",
                      help="Test Output Filename")
    parser.add_option(
        "--infile_positive",
        dest="positive_fnames",
        action="append",
        default=[],
        help="Positive Filename; default to True if isGroundingCorrect is None"
    )
    parser.add_option(
        "--infile_negative",
        dest="negative_fnames",
        action="append",
        default=[],
        help="Negative Filename; default to False if isGroundingCorrect is None"
    )
    parser.add_option(
        "--infile_labeled",
        dest="labeled_fnames",
        action="append",
        default=[],
        help="Labeled examples; skip if isGroundingCorrect is None")

    parser.add_option("--infile_unlabeled",
                      dest="unlabeled_fnames",
                      action="append",
                      default=[],
                      help="unlabeld fnames")

    parser.add_option("--feature_extractor",
                      dest="feature_extractor",
                      help="Feature Extractor Class")

    parser.add_option("--split",
                      dest="split",
                      type="string",
                      help="'random' to split randomly; 'scenario' to split " +
                      "by scenario.")

    parser.add_option(
        "--training_examples",
        dest="training_examples",
        action="append",
        help=
        "Examples that are in the training set; others go in the test set.  Can be passed more than once. "
    )

    (options, args) = parser.parse_args()

    try:
        from g3.feature_extractor.esdc_features import EsdcFeatures
        from g3.feature_extractor.esdc_flattened_features import EsdcFlattenedFeatures
        from g3.feature_extractor.grounded_features import GGGFeatures
        from g3.feature_extractor.rl_features import RLFeatures
        from g3.feature_extractor.bolt_features import BoltFeatures
        from g3.feature_extractor.ikea_features import IkeaFeatures
        from g3.feature_extractor.sr_features import SrFeatures
        #feature_extractor = semantic_map.esdc_semantic_map2.esdc_semantic_map()
        feature_extractor_cls = eval(options.feature_extractor)
        feature_extractor = feature_extractor_cls()
    except:
        print "error doing", options.feature_extractor
        raise

    observations = list()

    for positive_fname in options.positive_fnames:
        corpus = annotationIo.load(positive_fname)
        new_examples = generate_examples(basename(positive_fname),
                                         corpus,
                                         feature_extractor,
                                         default_class_value=True)
        if len(new_examples) == 0:
            raise ValueError("No examples from" + ` positive_fname `)
        observations.extend(new_examples)

    for negative_fname in options.negative_fnames:
        corpus = annotationIo.load(negative_fname)
        new_examples = generate_examples(basename(negative_fname),
                                         corpus,
                                         feature_extractor,
                                         default_class_value=False)
        if len(new_examples) == 0:
            raise ValueError("No examples from" + ` negative_fname `)

        observations.extend(new_examples)

    for labeled_fname in options.labeled_fnames:
        corpus = annotationIo.load(labeled_fname, check=False)
        new_examples = generate_examples(basename(labeled_fname),
                                         corpus,
                                         feature_extractor,
                                         default_class_value=None)
        if len(new_examples) == 0:
            raise ValueError("No examples from" + ` labeled_fname `)
        observations.extend(new_examples)

    for unlabeled_fname in options.unlabeled_fnames:
        corpus = annotationIo.load(unlabeled_fname)
        new_examples = generate_examples(basename(unlabeled_fname),
                                         corpus,
                                         feature_extractor,
                                         default_class_value=None,
                                         force_default_class_value=True)
        if len(new_examples) == 0:
            raise ValueError("No examples from" + ` unlabeled_fname `)
        observations.extend(new_examples)

    if options.split == "scenario":
        mturkCorpus = readCorpus.Corpus(
            "%s/data/corpusCommandsForVideoSmallFilesOnly/" % SLU_HOME)
        scenario_names = list(
            set(
                mturkCorpus.assignmentForId(
                    obs.annotation.assignmentId.split("_")[0]).scenario.name
                for obs in observations))
        random.shuffle(scenario_names)

        n_training_scenarios = int(ceil(len(scenario_names) * 0.7))

        training_scenarios = scenario_names[:n_training_scenarios]
        testing_scenarios = scenario_names[n_training_scenarios:]

        training = [
            o for o in observations if mturkCorpus.assignmentForId(
                o.annotation.assignmentId.split("_")[0]).scenario.name in
            training_scenarios
        ]

        testing = [
            o for o in observations if mturkCorpus.assignmentForId(
                o.annotation.assignmentId.split("_")[0]).scenario.name in
            testing_scenarios
        ]
    elif options.split == "annotation":
        '''
        Splits the examples, grouped by annotation.
        If the spatial relations corpus is included,
        that data will be in the training set only.
        '''
        training = []
        testing = []
        sr_ids = []
        ids = []

        for o in observations:
            aid = o.annotation.id
            if ((aid not in ids) and ("sr_" not in aid)):
                ids.append(aid)
            elif "sr_" in aid:
                sr_ids.append(aid)

        random.shuffle(ids)
        n_training_ids = int(ceil(len(ids) * 0.7))

        training_ids = ids[:n_training_ids]
        testing_ids = ids[n_training_ids:]

        training = [
            o for o in observations if o.annotation.id in training_ids
            or o.annotation.assignmentId in sr_ids
        ]
        testing = [o for o in observations if o.annotation.id in testing_ids]
    elif options.split == "random":
        random.shuffle(observations)
        n_training = int(ceil(len(observations) * 0.7))
        training = observations[0:n_training]
        testing = observations[n_training:]
    elif options.split == "labeled_annotation":
        training_ids = set()
        training = []
        testing = []
        for training_fname in options.training_examples:
            ds = pickle_util.load(training_fname)
            for ex in ds.observations:

                training_ids.add(ex.annotation.id)
                training_ids.add(ex.annotation.id.split("_")[0])
        print "training", training_ids
        for example in observations:
            if example.annotation.id in training_ids:
                training.append(example)
            else:
                aid = example.annotation.id.split("_")[0]
                if aid in training_ids:
                    training.append(example)
                else:
                    print "skipping", example.annotation.id, aid
                    testing.append(example)
        print "labeled training", len(training)
        print "labeled testing", len(testing)
    elif options.split == "labeled_file":
        training = []
        testing = []
        for example in observations:
            if "training" in example.annotation.fname:
                training.append(example)
            elif "testing" in example.annotation.fname:
                testing.append(example)
            else:
                training.append(example)

    elif options.split == "labeled":
        training_ids = set()
        training = []
        testing = []
        for training_fname in options.training_examples:
            ds = pickle_util.load(training_fname)
            for ex in ds.observations:
                print "id", ex.id
                training_ids.add(ex.id)

        for example in observations:
            print "example", example.id
            if example.id in training_ids:

                training.append(example)
            else:
                testing.append(example)

    else:
        raise ValueError("Unexpected split type: " + ` options.split `)

    training_dataset = ContinuousDataset(training, feature_extractor_cls)
    testing_dataset = ContinuousDataset(testing, feature_extractor_cls)

    print "saving ", len(training), " examples to:", options.training_fname
    pickle_util.save(options.training_fname, training_dataset)

    print "saving ", len(testing), " examples to:", options.testing_fname
    pickle_util.save(options.testing_fname, testing_dataset)

コード例 #7

0

ファイルを表示

ファイル: objectAnnotator.py プロジェクト: h2r/slu_core

    def __init__(self, mturkCorpus=None, stateType=None, ground_children=False):
        QMainWindow.__init__(self)

        if mturkCorpus != None:
            self.corpus = readCorpus.Corpus(mturkCorpus)
        else:
            self.corpus = None
        self.ground_children = ground_children
        self.artists = []
        self.state = None
        self.stateType = stateType
        self.artistsDict = {}
        self.pathNodes = {}
        self.currPath = []


        self.setupUi(self)
        self.figure = mpl.figure()
        self.axes = self.figure.gca()
        self.axes.set_aspect("equal")
        self.oldParent = self.figure.canvas.parent()
        self.figure.canvas.setParent(self)
        self.matplotlibFrame.layout().addWidget(self.figure.canvas)
        self.toolbar = NavigationToolbar2QT(self.figure.canvas, self)
        self.addToolBar(self.toolbar)
#        self.limits = [0, 60, 20, 60]
        self.limits = [-71.15, -71.05, 42.35, 42.41]
        self.entireText = None
        self.addPathButton.setEnabled(False)
        self.restoreLimits()


        self.contextWindow = context3d.MainWindow()
        self.contextWindow.show()
        self.connect(self.contextWindow.glWidget,
                     SIGNAL("selectedGrounding()"),
                     self.selectedGrounding)

        self.connect(self.sourceEdit,
                     SIGNAL("editingFinished()"),
                     self.updateSource)


        self.connect(self.shouldDrawAgentCheckBox, SIGNAL("stateChanged(int)"),
                     self.draw)

        self.connect(self.annotationFilter,
                     SIGNAL("editingFinished()"),
                     self.filterAnnotations)

        self.figure.canvas.mpl_connect('draw_event', self.updateLimits)
        self.figure.canvas.mpl_connect('pick_event', self.onpick)

        self.esdcModel = esdcTreeModel.Model(self.esdcTreeView)
        self.annotationModel = annotationModel.Model(self.annotationTable)
        self.groundingsModel = groundingsModel.Model(self.groundingsTable)
        self.contextModel = groundingsModel.Model(self.contextTable)
        
        self.pathSegmentsModel = groundingsModel.Model(self.pathSegmentsTable)

        self.largePathSegmentsModel = groundingsModel.Model(self.largePathSegmentsTable)
        
        self.path_segmenter = uniform_segmenter.Segmenter()
        self.large_path_segmenter = pause_segmenter.Segmenter()

        print "connecting"

        self.connect(self.pathSegmentsTable.selectionModel(),
                     SIGNAL("selectionChanged (QItemSelection,QItemSelection)"),
                     self.draw)
        self.connect(self.largePathSegmentsTable.selectionModel(),
                     SIGNAL("selectionChanged (QItemSelection,QItemSelection)"),
                     self.selectLargePathSegments)
        self.connect(self.esdcTreeView.selectionModel(),
                     SIGNAL("selectionChanged (QItemSelection,QItemSelection)"),
                     self.selectEsdc)

        self.connect(self.annotationTable.selectionModel(),
                     SIGNAL("selectionChanged (QItemSelection,QItemSelection)"),
                     self.selectAnnotation)

        self.connect(self.contextTable.selectionModel(),
                     SIGNAL("selectionChanged (QItemSelection,QItemSelection)"),
                     self.selectContext)

        self.connect(self.groundingsTable.selectionModel(),
                     SIGNAL("selectionChanged (QItemSelection,QItemSelection)"),
                     self.selectGrounding)

        self.connect(self.addAgentPathSegmentsButton,
                     SIGNAL("clicked()"),
                     self.addAgentPathSegments)
        
        self.connect(self.updateStateButton,
                     SIGNAL("clicked()"),
                     self.updateState)

        self.connect(self.addGroundingButton,
                     SIGNAL("clicked()"),
                     self.addGrounding)

        self.connect(self.clearGroundingsButton,
                     SIGNAL("clicked()"),
                     self.clearGroundings) 
        
        self.connect(self.addPathButton,
                     SIGNAL("clicked()"),
                     self.addPath)

        self.connect(self.actionNextEsdc,
                     SIGNAL("triggered()"),
                     self.nextEsdc)

        self.connect(self.actionClearGroundings,
                     SIGNAL("triggered()"),
                     self.clearGroundings) 

        self.connect(self.actionNextNan,
                     SIGNAL("triggered()"),
                     self.selectNanAnnotation)        

        self.connect(self.actionPreviousEsdc,
                     SIGNAL("triggered()"),
                     self.previousEsdc)                
        


        self.connect(self.actionNextCommand,
                     SIGNAL("triggered()"),
                     self.nextCommand)

        self.connect(self.actionPreviousCommand,
                     SIGNAL("triggered()"),
                     self.previousCommand)                

        
        self.connect(self.actionSave,
                     SIGNAL("triggered()"),
                     self.save)                
        self.connect(self.actionLoad,
                     SIGNAL("triggered()"),
                     self.load)

        self.connect(self.actionPath_Nodes,
                     SIGNAL("triggered()"),
                     self.path_mode)
        
        self.connect(self.actionGroundings,
                     SIGNAL("triggered()"),
                     self.groundings_mode)

        self.connect(self.classComboBox,
                     SIGNAL("currentIndexChanged(int)"),
                     self.updateClass)   

        self.connect(self.actionNextEmptyAnnotation,
                     SIGNAL("triggered()"),
                     self.selectNextEmptyAnnotation)

        
        self.connect(self.actionGroundingIsCorrect,
                     SIGNAL("triggered()"),
                     self.setGroundingIsCorrect)

        self.connect(self.actionGroundingIsNotCorrect,
                     SIGNAL("triggered()"),
                     self.setGroundingIsNotCorrect)