def split_file(path, folders): """ Create the set of splits for a single file. Parameters: ----------- path: string path to the input Decompressed file folders: dict dictionary containing the id of the split as key and the path to the folder where the split is saved as value. """ filerels = fh.DecompressedFile(path) idx2rels = {} with filerels as frels: fname_in = frels.filename for idfr, arr in frels.iterate_frames(): idx2rels[idfr] = arr for split in sorted(folders): perc = int(len(idx2rels)*split/100) logger.info('Creating split with {}%: {}*0.{} = {} observations'.format(split, len(idx2rels), split, perc)) indexes = list(idx2rels.keys()) sampling = random.choices(indexes, k=perc) fileoutput = join(folders[split], fname_in) save_file(fileoutput, sampling, idx2rels)
def compress_relations(file_input, output=None, file_types='types.pddl', class_file='classes.cfg', rels_file='relations.cfg', keep_names=False): if not output: fname, _ = splitext(basename(file_input)) output = join(dirname(file_input), fname + '_predicates.txt') # Load classes for objects from dict {0: 'rel0', 1: 'rel1'} do = fh.ConfigFile(class_file).load_classes(cnames=True) logger.info('Loaded dictionary with {} objects.'.format(len(do))) dr = fh.ConfigFile(rels_file).load_classes(cnames=True) logger.info('Loaded dictionary with {} relations.'.format(len(dr))) dp = fh.PddlTypes(file_types) df = fh.DecompressedFile(file_input) rels = df.list_relations() relations = [] for s, r, o in rels: s = s.replace('_', '-') o = o.replace('_', '-') relations.append((r, dp[s], dp[o])) relations.append((r, (s, dp[s][1]), (o, dp[o][1]))) with open(output, 'w') as fout: fout.write('(:predicates\n') for r, s, o in sorted(set(relations)): # (on ?f - food ?o - object) fout.write(' (%s ?%s - %s ?%s - %s)\n' % (r, s[1], s[0], o[1], o[0])) fout.write(')') logger.info('File saved at: %s' % output)
def sanitize_relations(file_relations, config_file): """ Check whether a file containing relations is according with the possible relations described in the `config_file` file. """ logger.info('Checking file: {}'.format(file_relations)) verify_sequence_frames(file_relations) drels = load_possible_relations(config_file) # Load groups of relations for frame errors = 0 recorded = {} frls = fh.DecompressedFile(file_relations) with frls as frels: for arr in frels: fr, o1, r, o2 = arr[0], arr[1], arr[2], arr[3] pathimg = str(fr)+'.jpg' if not drels.has_key((o1, r, o2)) and not recorded.has_key((o1, r, o2)): logger.error('There is not possible relation: [{}, {}, {}]'.format(o1, r, o2)) recorded[(o1, r, o2)] = '' errors += 1 if errors: logger.info('Finished WITH {} errors!'.format(errors)) else: logger.info('Finished without errors!')
def show_relations(file_input, output=None, class_file='classes.cfg', rels_file='relations.cfg', keep_names=False): if not output: fname, _ = splitext(basename(file_input)) output = join(dirname(file_input), fname + '_list.txt') # Load classes for objects from dict {0: 'rel0', 1: 'rel1'} do = fh.ConfigFile(class_file).load_classes(cnames=True) logger.info('Loaded dictionary with {} objects.'.format(len(do))) dr = fh.ConfigFile(rels_file).load_classes(cnames=True) logger.info('Loaded dictionary with {} relations.'.format(len(dr))) # Check whether the file contains names or ids with open(file_input) as fin: for line in fin: if line[0].isdigit(): break arr = line.strip().split('-') if len(arr) == 5: #0-15-o1-r-o2 handler = fh.CompressedFile(file_input) else: #0-o1,r,o2 handler = fh.DecompressedFile(file_input) list_rels = handler.list_relations(as_set=True) with open(output, 'w') as fout: for o1, r, o2 in sorted(list_rels): fout.write('{} {} {}\n'.format(o1, r, o2)) logger.info('File saved at: %s' % output)
def load_relations(file_relations, do, dr, dmap=None, home=None): """ Returns a dictionary containing the form drels[path_img] = [(idsub1, idrel1, idobj1), (idsub2, idrel2, idobj2),...] Parameters: ----------- file_relations: string path to the Decompressed file containing relations do: dict dictionary in the form {'object_1': idobj1, 'object_2': idobj2...} dr: dict dictionary in the form {'relation1': idrel1, 'relation2': idrel2...} dmap: dict dictionary with maps from KSCGR to VOC dataset {'kscgr_path1': voc_path1,...} home: string path to the files in the server, such as '/usr/share/datasets/VOC/' """ dic_rels = defaultdict(list) # relations for each image logger.info('Loading information from file: {}'.format(file_relations)) filerls = fh.DecompressedFile(file_relations) pb = pbar.ProgressBar(filerls.nb_lines()) with filerls as frels: for fr, o1, r, o2, path in frels: #print fr, o1, r, o2, path idsub = do[o1] idrel = dr[r] idobj = do[o2] pathimg = join(path, str(fr) + '.jpg') if dmap: pathimg = dmap[join(home, pathimg)] dic_rels[pathimg].append((idsub, idrel, idobj)) pb.update() return dic_rels
def _extract_relations(self, folder_input): """ Extract relations (sub, rel, obj) from multiple files. """ relations = [] logger.info('Generating dictionary...') relfiles = fh.FolderHandler(folder_input) for file_input in relfiles: with fh.DecompressedFile(file_input) as cf: relations.extend(cf.list_relations()) return relations
def cohen_kappa_relations(fanno_1, fanno_2): dic = {} annotator1 = [] annotator2 = [] fd1 = fh.DecompressedFile(fanno_1) fd2 = fh.DecompressedFile(fanno_2) for arr1, arr2 in zip(fd1.iterate_frames(), fd2.iterate_frames()): idf1, vec1 = arr1 idf2, vec2 = arr2 convert_list(dic, vec1) convert_list(dic, vec2) if idf1 != idf2: logger.error( 'Files do not contain the same sequence of frames: {}/{}'. format(idf1, idf2)) sys.exit() v1, v2 = align_lists(vec1, vec2) annotator1.extend(v1) annotator2.extend(v2) kappa = cohen_kappa_score(annotator1, annotator2) print(kappa) print('Finished')
def main(inputfile, output=None, class_file='classes.cfg', rels_file='relations.cfg'): """ Create a `so_prior.pkl` file containing the relationship between objects. """ if not output: output = join(dirname(inputfile), 'so_prior.pkl') # Load classes for objects from dict {0: 'rel0', 1: 'rel1'} # DO NOT LOAD `__background__`, thus id_person=0 do = fh.ConfigFile(class_file, background=False).load_classes(cnames=True) logger.info('Loaded dictionary with {} objects.'.format(len(do))) dr = fh.ConfigFile(rels_file).load_classes(cnames=True) logger.info('Loaded dictionary with {} relations.'.format(len(dr))) so_prior = np.zeros((len(do), len(do), len(dr)), dtype='float64') objsub = np.zeros((len(do), len(do)), dtype='float64') logger.info('Matrix of objects and relations with shape: {}'.format( so_prior.shape)) logger.info('Matrix of only objects with shape: {}'.format(objsub.shape)) filerels = fh.DecompressedFile(inputfile) logger.info('Loading information from file: {}'.format(inputfile)) nb_lines = filerels.nb_lines() pb = pbar.ProgressBar(nb_lines) logger.info('Processing {} lines...'.format(nb_lines)) with filerels as frels: for arr in frels: fr, o1, r, o2 = arr[0], arr[1], arr[2], arr[3] idsub = do[o1] idrel = dr[r] idobj = do[o2] so_prior[idsub][idobj][idrel] += 1 objsub[idsub][idobj] += 1 pb.update() print for i in range(so_prior.shape[2]): so_prior[:, :, i] = np.divide(so_prior[:, :, i], objsub, out=np.zeros_like(so_prior[:, :, i]), where=objsub != 0) fout = open(output, 'wb') cPickle.dump(so_prior, fout) fout.close() logger.info('Saved content in file: {}'.format(output))
def goal_state_from_file(fileinput, output=None): """ Extract the goal state for a single file. In case of having `output`, the goal state is saved into a file. Parameters: ----------- fileinput: string path to the DecompressedFile containing relations output: string (optional) path to the output file """ fd = fh.DecompressedFile(fileinput) rels = fd.relations_at_frame(fd.nb_frames() - 1) rels = check_group(rels) if output: save_goal(rels) return rels
def preconditions_effects_pair(file_input, srep, dprec): last_state = [] current_state = [] logger.info('Processing file: {}'.format(file_input)) config = fh.PDDLInit() dinit = config.dic_initial_states() last_state = State(srep, dinit) with fh.DecompressedFile(file_input) as cf: for idfr, relations in cf.iterate_frames(): #print idfr, State(srep, relations).state current_state = State(srep, relations) if current_state != last_state: effect = last_state.XORe(current_state) nb = len(dprec) dprec[nb] = (last_state, effect) last_state = current_state return dprec
def preconditions_effects(file_input, srep, dprec, dinit): """ Generate the preconditions and effects of each action """ last_state = [] current_state = [] logger.info('Processing file: {}'.format(file_input)) last_state = State(srep, dinit) with fh.DecompressedFile(file_input) as cf: for idfr, relations in cf.iterate_frames(): #print idfr, State(srep, relations).state current_state = State(srep, relations) if current_state != last_state: effect = last_state.XORe(current_state) if effect in dprec: dprec[effect].append(last_state) else: dprec[effect] = [last_state] last_state = current_state return dprec
def preconditions_effects_solo(file_input, srep, dprec): last_state = [] current_state = [] logger.info('Processing file: {}'.format(file_input)) nb = 0 with fh.DecompressedFile(file_input) as cf: for idfr, relations in cf.iterate_frames(): #print idfr, State(srep, relations).state if idfr == 0: last_state = State(srep, relations) else: current_state = State(srep, relations) if current_state != last_state: effect = last_state.XORe(current_state) if effect in dprec: dprec[effect].append(last_state) else: dprec[effect] = [last_state] nb += 1 last_state = current_state return dprec
def compress_relations(file_input, output=None, class_file='classes.cfg', rels_file='relations.cfg', keep_names=False): if not output: fname, _ = splitext(basename(file_input)) output = join(dirname(file_input), fname + '_compressed.txt') # Load classes for objects from dict {0: 'rel0', 1: 'rel1'} do = fh.ConfigFile(class_file).load_classes(cnames=True) logger.info('Loaded dictionary with {} objects.'.format(len(do))) dr = fh.ConfigFile(rels_file).load_classes(cnames=True) logger.info('Loaded dictionary with {} relations.'.format(len(dr))) df = fh.DecompressedFile(file_input) dcomp = df.group_relations() logger.info( 'Found {} relations spread on {} lines of the input file.'.format( len(df.start_frames), df.nb_line - 1)) logger.info('Compressed to {} lines in output file.'.format( len(df.start_frames))) logger.info('Saving output file...') with open(output, 'w') as fout: if keep_names: fout.write('Initial_frame-Final_frame Subject Relation Object\n') else: fout.write('Initial_frame-Final_frame-Subject-Relation-Object\n') for _, key in sorted(df.start_frames): start, end = dcomp[key]['contiguous'].pop(0) subj, rel, obj = key if keep_names: fout.write('%d-%d %s %s %s\n' % (start, end, subj, rel, obj)) else: fout.write('%d-%d-%d-%d-%d\n' % (start, end, do[subj], dr[rel], do[obj])) logger.info('File saved at: %s' % output)
def sanitize_for_bounding_boxes(file_objects, file_relations, output=None): """ Check whether a file of relations is according with the bouding boxes described in the `file_objects` file. """ if not output: fname, ext = splitext(basename(file_relations)) output = join(dirname(file_relations), fname+'_sanity'+ext) verify_sequence_frames(file_objects) verify_sequence_frames(file_relations) drels = defaultdict(list) # Load groups of relations for frame frls = fh.DecompressedFile(file_relations) with frls as frels: for arr in frels: fr, o1, r, o2 = arr[0], arr[1], arr[2], arr[3] pathimg = str(fr)+'.jpg' drels[pathimg].append((fr, o1, r, o2)) logger.info('Loaded relations for {} frames.'.format(len(drels))) errors = 0 filelis = fh.LisFile(file_objects) with open(output, 'w') as fout, filelis as flis: fout.write('Frame\tSubject\tRelation\tObject\tPath: {}\n'.format(frls.path)) for pathimg, arr in flis.iterate_frames(): objects = [bbox[0] for bbox in arr] relations = drels[pathimg] for fr, o1, r, o2 in relations: if check_error(objects, o1, pathimg) and check_error(objects, o2, pathimg): fout.write('{}\t{}\t{}\t{}\n'.format(fr, o1, r, o2)) else: errors += 1 if errors: logger.info('Finished WITH {} errors!'.format(errors)) else: os.remove(output) logger.info('Finished without errors!')
def merge_annotation(folder_input, output=None, class_file='classes.cfg', rels_file='relations.cfg'): if not output: output = join(folder_input, 'merged_relations.txt') # Load classes for objects from dict {0: 'rel0', 1: 'rel1'} do = fh.ConfigFile(class_file).load_classes(cnames=True) logger.info('Loaded dictionary with {} objects.'.format(len(do))) dr = fh.ConfigFile(rels_file).load_classes(cnames=True) logger.info('Loaded dictionary with {} relations.'.format(len(dr))) files = fh.FolderHandler(folder_input) with open(output, 'w') as fout: fout.write('Frame\tSubject\tRelation\tObject\tPath\n') for path in files: logger.info('Processing file: %s' % path) filerels = fh.DecompressedFile(path) with filerels as frels: for fr, o1, r, o2 in frels: check_error(do, o1, frels.nb_lines) check_error(do, o2, frels.nb_lines) check_error(dr, r, frels.nb_lines) fout.write('%d\t%s\t%s\t%s\t%s\n' % (fr, o1, r, o2, filerels.path)) logger.info('Saved relations in file: %s' % output)
def main(fileobj, filerel, output=None, class_file='classes.cfg', rels_file='relations.cfg', map_paths='map_paths.txt'): """ Create a `train.pkl` or 'test.pkl` file containing the relationship between objects. TODO: Implement relations for two objects of the same class in the same image """ if not output: output = join(dirname(fileobj), 'train.pkl') fdicobj = join(dirname(output), 'obj.txt') fdicrel = join(dirname(output), 'rel.txt') if map_paths: fmap = fh.MapFile(map_paths) dmap = fmap.load_dictionary(key='kscgr') logger.info('Loaded map file containing {} entries.'.format(len(dmap))) home = fmap.path # Load classes for objects from dict {0: 'rel0', 1: 'rel1'} # DO NOT LOAD `__background__`. Thus, id_person=0 do = fh.ConfigFile(class_file, background=False).load_classes(cnames=True) logger.info('Loaded dictionary with {} objects.'.format(len(do))) dr = fh.ConfigFile(rels_file).load_classes(cnames=True) logger.info('Loaded dictionary with {} relations.'.format(len(dr))) dic_rels = defaultdict(list) # relations for each image logger.info('Loading information from file: {}'.format(filerel)) filerls = fh.DecompressedFile(filerel) pb = pbar.ProgressBar(filerls.nb_lines()) with filerls as frels: for fr, o1, r, o2, path in frels: idsub = do[o1] idrel = dr[r] idobj = do[o2] pathimg = join(path, str(fr) + '.jpg') if map_paths: pathimg = dmap[join(home, pathimg)] dic_rels[pathimg].append((idsub, idrel, idobj)) pb.update() print info = [] # Load objects logger.info('Loading information from file: {}'.format(fileobj)) flis = fh.LisFile(fileobj) nb_frames = filerls.nb_frames() pb = pbar.ProgressBar(nb_frames) logger.info('Processing {} frames.'.format(nb_frames)) with flis as fin: for imgname, arr in flis.iterate_frames(): filepath = dmap[join(home, imgname)] classes, boxes = [], [] vsub, vobj, vrel = [], [], [] dor = {} for i in range(len(arr)): obj, x, y, w, h = arr[i] iobj = do[obj] dor[iobj] = i classes.append(iobj) boxes.append([x, y, x + w, y + w]) # [xmin,ymin,xmax,ymax] for idsub, idrel, idobj in dic_rels[filepath]: vsub.append(dor[idsub]) vobj.append(dor[idobj]) vrel.append([idrel]) info.append({ 'img_path': filepath, 'classes': np.array(classes), 'boxes': np.array(boxes), 'ix1': np.array(vsub), 'ix2': np.array(vobj), 'rel_classes': vrel }) pb.update() logger.info('Saving pickle file...') fout = open(output, 'wb') cPickle.dump(info, fout) fout.close() logger.info('Saved content in file: {}'.format(output)) save_dictionary(fdicobj, do) save_dictionary(fdicrel, dr)