def create_valtokcap(data_folder=DATA_FOLDER): import gc gc.collect() vcap = COCO(COCO_VALID_CAP_FILE) valimgids, tesimgids = getValimgids(), getTesimgids() valcap = [] for i in valimgids: valcap += vcap.imgToAnns[i] tescap = [] for i in tesimgids: tescap += vcap.imgToAnns[i] vallistedCapMap = {} for i in valcap: vallistedCapMap[i['id']] = [ dict([('caption', i['caption']), ('image_id', i['image_id'])]) ] valtokenizedListedCapMap = PTBTokenizer().tokenize(vallistedCapMap) teslistedCapMap = {} for i in tescap: teslistedCapMap[i['id']] = [ dict([('caption', i['caption']), ('image_id', i['image_id'])]) ] testokenizedListedCapMap = PTBTokenizer().tokenize(teslistedCapMap) valtokcap = [ ] #map caption ids to a map of its tokenized caption and image id for i, j in valtokenizedListedCapMap.iteritems(): valtokcap += [(i, dict([('caption', j[0]), ('image_id', vallistedCapMap[i][0]['image_id'])])) ] testokcap = [] for i, j in testokenizedListedCapMap.iteritems(): testokcap += [(i, dict([('caption', j[0]), ('image_id', teslistedCapMap[i][0]['image_id'])])) ] f = open(data_folder + '/preprocessed/valtokcap.json', 'w') json.dump(valtokcap, f) f.close() f = open(data_folder + '/preprocessed/testokcap.json', 'w') json.dump(testokcap, f) f.close()
def create_valtokcap(data_folder=DATA_FOLDER): import gc gc.collect() vcap = COCO(COCO_VALID_CAP_FILE) valimgids, tesimgids = getValimgids(), getTesimgids() valcap = [] for i in valimgids: valcap += vcap.imgToAnns[i] tescap = [] for i in tesimgids: tescap += vcap.imgToAnns[i] vallistedCapMap = {} for i in valcap: vallistedCapMap[i['id']] = [dict([('caption',i['caption']), ('image_id', i['image_id'])])] valtokenizedListedCapMap = PTBTokenizer().tokenize(vallistedCapMap) teslistedCapMap = {} for i in tescap: teslistedCapMap[i['id']] = [dict([('caption',i['caption']), ('image_id', i['image_id'])])] testokenizedListedCapMap = PTBTokenizer().tokenize(teslistedCapMap) valtokcap = [] #map caption ids to a map of its tokenized caption and image id for i, j in valtokenizedListedCapMap.iteritems(): valtokcap += [(i, dict([('caption', j[0]), ('image_id', vallistedCapMap[i][0]['image_id'])]))] testokcap = [] for i, j in testokenizedListedCapMap.iteritems(): testokcap += [(i, dict([('caption', j[0]), ('image_id', teslistedCapMap[i][0]['image_id'])]))] f = open(data_folder + '/preprocessed/valtokcap.json', 'w') json.dump(valtokcap, f) f.close() f = open(data_folder + '/preprocessed/testokcap.json', 'w') json.dump(testokcap, f) f.close()
def create_tokcap(data_folder=DATA_FOLDER): cap = COCO(COCO_TRAIN_CAP_FILE) listedCapMap = {} for i in cap.loadAnns(cap.getAnnIds()): listedCapMap[i['id']] = [dict([('caption',i['caption']), ('image_id', i['image_id'])])] tokenizedListedCapMap = PTBTokenizer().tokenize(listedCapMap) tokcap = [] #map caption ids to a map of its tokenized caption and image id for i, j in tokenizedListedCapMap.iteritems(): tokcap += [(i, dict([('caption', j[0]), ('image_id', listedCapMap[i][0]['image_id'])]))] f = open(data_folder + '/preprocessed/tokcap.json', 'w') json.dump(tokcap, f) f.close()
def create_tokcap(data_folder=DATA_FOLDER): cap = COCO(COCO_TRAIN_CAP_FILE) listedCapMap = {} for i in cap.loadAnns(cap.getAnnIds()): listedCapMap[i['id']] = [ dict([('caption', i['caption']), ('image_id', i['image_id'])]) ] tokenizedListedCapMap = PTBTokenizer().tokenize(listedCapMap) tokcap = [ ] #map caption ids to a map of its tokenized caption and image id for i, j in tokenizedListedCapMap.iteritems(): tokcap += [(i, dict([('caption', j[0]), ('image_id', listedCapMap[i][0]['image_id'])]))] f = open(data_folder + '/preprocessed/tokcap.json', 'w') json.dump(tokcap, f) f.close()