def main(): corpus = readCorpus.Corpus("dataCollection/data/corpusCommandsForVideoSmallFilesOnly/") filtered_annotations = [x for x in corpus.assignments if (x.scenario.platform == "forklift" and x.scenario.isSimulated == True)] assignments = set(a.scenario for a in filtered_annotations) for a in assignments: print "/".join(a.lcmlog.split("/")[-2:])
def main(): corpus = readCorpus.Corpus( "dataCollection/data/corpusCommandsForVideoSmallFilesOnly/") iserVerbs = ["meet", "bring", "avoid", "follow"] for annotation in corpus.annotations: scenario = annotation.scenario for verb in iserVerbs: if verb in annotation.command: print verb, scenario.youtubeId, scenario.name, print annotation.command
def main(): corpus = readCorpus.Corpus( "../mturk_data_collector/data/corpusCommandsForVideoSmallFilesOnly/") annotators = set() scenarios = set() commands = [] words = [] commands_per_scenario = collections.defaultdict(lambda: 0) discourses = [] current_scenario = None for annotation in corpus.assignments: scenario = annotation.scenario if current_scenario != scenario: current_scenario = scenario print "scenario", scenario.name if scenario.platform == "forklift" and scenario.isSimulated and len( annotation.command) <= 150: scenarios.add(scenario) annotators.add(annotation.workerId) words.extend(annotation.command.split()) discourses.append(annotation.command) #print "*", annotation.command, scenario.youtubeUrl commands.append(annotation.command) print annotation.command commands_per_scenario[scenario.name] += 1 print len(annotators), "annotators" print len(scenarios), "videos" print len(commands), "commands" print len(words), "words" print len(set(words)), "unique words" posTagger = pos_tagger.makeTagger() pos_histograms(discourses, posTagger) for scenario in sorted(scenarios): print scenario, commands_per_scenario[scenario.name] for word in [ "put", "take", "bring", "go", "move", "to", "on", "across", "the", "cymbal" ]: print word + ":", print len([w for w in words if w.lower() == word]) mean_commands_per_scenario = na.mean(commands_per_scenario.values()) print "mean commands", mean_commands_per_scenario
def main(): parser = OptionParser() parser.add_option("--source_corpus", dest="source_corpus", help="The file or directory to read from") parser.add_option("--dest_corpus", dest="dest_corpus", help="The yaml file to which data will be written") (options, args) = parser.parse_args() corpus = readCorpus.Corpus(options.source_corpus) extractor = Extractor() #oldCorpus = annotationIo.load("%s/dataAnnotation/data/forkliftMturkEsdcs.stefie10.groundings.withPaths.yaml" % os.environ['FORKLIFT_HOME']) #oldAssignmentIds = set(a.assignmentId for a in oldCorpus) #print len(oldAssignmentIds), "old ids" yamlData = [] print len(corpus.assignments), "assignments" filteredAnnotations = [ x for x in corpus.assignments if (x.scenario.platform == "forklift" and x.scenario.isSimulated == True) ] print len(filteredAnnotations), "filtered annotations" output_annotations = [] for i, a in enumerate(filteredAnnotations): esdcs = extractor.extractEsdcs(a.command) assignmentId = a.assignmentId agent = a.agent context = a.context esdcSource = [assignmentId for esdc in esdcs.flattenedEsdcs] output_annotations.append( Annotation(assignmentId=assignmentId, esdcs=esdcs, objectGroundings=None, groundingIsCorrect=None, agent=agent, context=context, esdcSource=esdcSource)) esdcYaml = toYaml(esdcs) print "******" print a, esdcYaml print esdcs yamlData.append({"command": esdcYaml, "assignmentId": a.assignmentId}) print "dumped", i, "commands." yaml.dump(yamlData, open(options.dest_corpus, "w"))
def main(): import sys fname = sys.argv[1] assignments = readCorpus.Corpus( "dataCollection/data/corpusCommandsForVideoSmallFilesOnly/") corpus = annotationIo.load(fname) word_cnt = 0 workers = set() scenarios = set() for annotation in corpus: assignment = assignments.assignmentForId(annotation.assignmentId) word_cnt += len(annotation.entireText.split()) workers.add(assignment.workerId) scenarios.add(assignment.scenario) if assignment.scenario.name == "put_tire_pallet_on_loaded_truck": print "command", assignment.scenario.name, annotation.entireText print len(scenarios), "scenarios" print len(workers), "annotators" print word_cnt, "words" print len(corpus), "commands"
def main(): parser = OptionParser() parser.add_option("--outfile_training", dest="training_fname", help="Training Output Filename") parser.add_option("--outfile_test", dest="testing_fname", help="Test Output Filename") parser.add_option( "--infile_positive", dest="positive_fnames", action="append", default=[], help="Positive Filename; default to True if isGroundingCorrect is None" ) parser.add_option( "--infile_negative", dest="negative_fnames", action="append", default=[], help="Negative Filename; default to False if isGroundingCorrect is None" ) parser.add_option( "--infile_labeled", dest="labeled_fnames", action="append", default=[], help="Labeled examples; skip if isGroundingCorrect is None") parser.add_option("--infile_unlabeled", dest="unlabeled_fnames", action="append", default=[], help="unlabeld fnames") parser.add_option("--feature_extractor", dest="feature_extractor", help="Feature Extractor Class") parser.add_option("--split", dest="split", type="string", help="'random' to split randomly; 'scenario' to split " + "by scenario.") parser.add_option( "--training_examples", dest="training_examples", action="append", help= "Examples that are in the training set; others go in the test set. Can be passed more than once. " ) (options, args) = parser.parse_args() try: from g3.feature_extractor.esdc_features import EsdcFeatures from g3.feature_extractor.esdc_flattened_features import EsdcFlattenedFeatures from g3.feature_extractor.grounded_features import GGGFeatures from g3.feature_extractor.rl_features import RLFeatures from g3.feature_extractor.bolt_features import BoltFeatures from g3.feature_extractor.ikea_features import IkeaFeatures from g3.feature_extractor.sr_features import SrFeatures #feature_extractor = semantic_map.esdc_semantic_map2.esdc_semantic_map() feature_extractor_cls = eval(options.feature_extractor) feature_extractor = feature_extractor_cls() except: print "error doing", options.feature_extractor raise observations = list() for positive_fname in options.positive_fnames: corpus = annotationIo.load(positive_fname) new_examples = generate_examples(basename(positive_fname), corpus, feature_extractor, default_class_value=True) if len(new_examples) == 0: raise ValueError("No examples from" + ` positive_fname `) observations.extend(new_examples) for negative_fname in options.negative_fnames: corpus = annotationIo.load(negative_fname) new_examples = generate_examples(basename(negative_fname), corpus, feature_extractor, default_class_value=False) if len(new_examples) == 0: raise ValueError("No examples from" + ` negative_fname `) observations.extend(new_examples) for labeled_fname in options.labeled_fnames: corpus = annotationIo.load(labeled_fname, check=False) new_examples = generate_examples(basename(labeled_fname), corpus, feature_extractor, default_class_value=None) if len(new_examples) == 0: raise ValueError("No examples from" + ` labeled_fname `) observations.extend(new_examples) for unlabeled_fname in options.unlabeled_fnames: corpus = annotationIo.load(unlabeled_fname) new_examples = generate_examples(basename(unlabeled_fname), corpus, feature_extractor, default_class_value=None, force_default_class_value=True) if len(new_examples) == 0: raise ValueError("No examples from" + ` unlabeled_fname `) observations.extend(new_examples) if options.split == "scenario": mturkCorpus = readCorpus.Corpus( "%s/data/corpusCommandsForVideoSmallFilesOnly/" % SLU_HOME) scenario_names = list( set( mturkCorpus.assignmentForId( obs.annotation.assignmentId.split("_")[0]).scenario.name for obs in observations)) random.shuffle(scenario_names) n_training_scenarios = int(ceil(len(scenario_names) * 0.7)) training_scenarios = scenario_names[:n_training_scenarios] testing_scenarios = scenario_names[n_training_scenarios:] training = [ o for o in observations if mturkCorpus.assignmentForId( o.annotation.assignmentId.split("_")[0]).scenario.name in training_scenarios ] testing = [ o for o in observations if mturkCorpus.assignmentForId( o.annotation.assignmentId.split("_")[0]).scenario.name in testing_scenarios ] elif options.split == "annotation": ''' Splits the examples, grouped by annotation. If the spatial relations corpus is included, that data will be in the training set only. ''' training = [] testing = [] sr_ids = [] ids = [] for o in observations: aid = o.annotation.id if ((aid not in ids) and ("sr_" not in aid)): ids.append(aid) elif "sr_" in aid: sr_ids.append(aid) random.shuffle(ids) n_training_ids = int(ceil(len(ids) * 0.7)) training_ids = ids[:n_training_ids] testing_ids = ids[n_training_ids:] training = [ o for o in observations if o.annotation.id in training_ids or o.annotation.assignmentId in sr_ids ] testing = [o for o in observations if o.annotation.id in testing_ids] elif options.split == "random": random.shuffle(observations) n_training = int(ceil(len(observations) * 0.7)) training = observations[0:n_training] testing = observations[n_training:] elif options.split == "labeled_annotation": training_ids = set() training = [] testing = [] for training_fname in options.training_examples: ds = pickle_util.load(training_fname) for ex in ds.observations: training_ids.add(ex.annotation.id) training_ids.add(ex.annotation.id.split("_")[0]) print "training", training_ids for example in observations: if example.annotation.id in training_ids: training.append(example) else: aid = example.annotation.id.split("_")[0] if aid in training_ids: training.append(example) else: print "skipping", example.annotation.id, aid testing.append(example) print "labeled training", len(training) print "labeled testing", len(testing) elif options.split == "labeled_file": training = [] testing = [] for example in observations: if "training" in example.annotation.fname: training.append(example) elif "testing" in example.annotation.fname: testing.append(example) else: training.append(example) elif options.split == "labeled": training_ids = set() training = [] testing = [] for training_fname in options.training_examples: ds = pickle_util.load(training_fname) for ex in ds.observations: print "id", ex.id training_ids.add(ex.id) for example in observations: print "example", example.id if example.id in training_ids: training.append(example) else: testing.append(example) else: raise ValueError("Unexpected split type: " + ` options.split `) training_dataset = ContinuousDataset(training, feature_extractor_cls) testing_dataset = ContinuousDataset(testing, feature_extractor_cls) print "saving ", len(training), " examples to:", options.training_fname pickle_util.save(options.training_fname, training_dataset) print "saving ", len(testing), " examples to:", options.testing_fname pickle_util.save(options.testing_fname, testing_dataset)
def __init__(self, mturkCorpus=None, stateType=None, ground_children=False): QMainWindow.__init__(self) if mturkCorpus != None: self.corpus = readCorpus.Corpus(mturkCorpus) else: self.corpus = None self.ground_children = ground_children self.artists = [] self.state = None self.stateType = stateType self.artistsDict = {} self.pathNodes = {} self.currPath = [] self.setupUi(self) self.figure = mpl.figure() self.axes = self.figure.gca() self.axes.set_aspect("equal") self.oldParent = self.figure.canvas.parent() self.figure.canvas.setParent(self) self.matplotlibFrame.layout().addWidget(self.figure.canvas) self.toolbar = NavigationToolbar2QT(self.figure.canvas, self) self.addToolBar(self.toolbar) # self.limits = [0, 60, 20, 60] self.limits = [-71.15, -71.05, 42.35, 42.41] self.entireText = None self.addPathButton.setEnabled(False) self.restoreLimits() self.contextWindow = context3d.MainWindow() self.contextWindow.show() self.connect(self.contextWindow.glWidget, SIGNAL("selectedGrounding()"), self.selectedGrounding) self.connect(self.sourceEdit, SIGNAL("editingFinished()"), self.updateSource) self.connect(self.shouldDrawAgentCheckBox, SIGNAL("stateChanged(int)"), self.draw) self.connect(self.annotationFilter, SIGNAL("editingFinished()"), self.filterAnnotations) self.figure.canvas.mpl_connect('draw_event', self.updateLimits) self.figure.canvas.mpl_connect('pick_event', self.onpick) self.esdcModel = esdcTreeModel.Model(self.esdcTreeView) self.annotationModel = annotationModel.Model(self.annotationTable) self.groundingsModel = groundingsModel.Model(self.groundingsTable) self.contextModel = groundingsModel.Model(self.contextTable) self.pathSegmentsModel = groundingsModel.Model(self.pathSegmentsTable) self.largePathSegmentsModel = groundingsModel.Model(self.largePathSegmentsTable) self.path_segmenter = uniform_segmenter.Segmenter() self.large_path_segmenter = pause_segmenter.Segmenter() print "connecting" self.connect(self.pathSegmentsTable.selectionModel(), SIGNAL("selectionChanged (QItemSelection,QItemSelection)"), self.draw) self.connect(self.largePathSegmentsTable.selectionModel(), SIGNAL("selectionChanged (QItemSelection,QItemSelection)"), self.selectLargePathSegments) self.connect(self.esdcTreeView.selectionModel(), SIGNAL("selectionChanged (QItemSelection,QItemSelection)"), self.selectEsdc) self.connect(self.annotationTable.selectionModel(), SIGNAL("selectionChanged (QItemSelection,QItemSelection)"), self.selectAnnotation) self.connect(self.contextTable.selectionModel(), SIGNAL("selectionChanged (QItemSelection,QItemSelection)"), self.selectContext) self.connect(self.groundingsTable.selectionModel(), SIGNAL("selectionChanged (QItemSelection,QItemSelection)"), self.selectGrounding) self.connect(self.addAgentPathSegmentsButton, SIGNAL("clicked()"), self.addAgentPathSegments) self.connect(self.updateStateButton, SIGNAL("clicked()"), self.updateState) self.connect(self.addGroundingButton, SIGNAL("clicked()"), self.addGrounding) self.connect(self.clearGroundingsButton, SIGNAL("clicked()"), self.clearGroundings) self.connect(self.addPathButton, SIGNAL("clicked()"), self.addPath) self.connect(self.actionNextEsdc, SIGNAL("triggered()"), self.nextEsdc) self.connect(self.actionClearGroundings, SIGNAL("triggered()"), self.clearGroundings) self.connect(self.actionNextNan, SIGNAL("triggered()"), self.selectNanAnnotation) self.connect(self.actionPreviousEsdc, SIGNAL("triggered()"), self.previousEsdc) self.connect(self.actionNextCommand, SIGNAL("triggered()"), self.nextCommand) self.connect(self.actionPreviousCommand, SIGNAL("triggered()"), self.previousCommand) self.connect(self.actionSave, SIGNAL("triggered()"), self.save) self.connect(self.actionLoad, SIGNAL("triggered()"), self.load) self.connect(self.actionPath_Nodes, SIGNAL("triggered()"), self.path_mode) self.connect(self.actionGroundings, SIGNAL("triggered()"), self.groundings_mode) self.connect(self.classComboBox, SIGNAL("currentIndexChanged(int)"), self.updateClass) self.connect(self.actionNextEmptyAnnotation, SIGNAL("triggered()"), self.selectNextEmptyAnnotation) self.connect(self.actionGroundingIsCorrect, SIGNAL("triggered()"), self.setGroundingIsCorrect) self.connect(self.actionGroundingIsNotCorrect, SIGNAL("triggered()"), self.setGroundingIsNotCorrect)