コード例 #1
0
def create_valtokcap(data_folder=DATA_FOLDER):
    import gc
    gc.collect()
    vcap = COCO(COCO_VALID_CAP_FILE)
    valimgids, tesimgids = getValimgids(), getTesimgids()
    valcap = []
    for i in valimgids:
        valcap += vcap.imgToAnns[i]

    tescap = []
    for i in tesimgids:
        tescap += vcap.imgToAnns[i]

    vallistedCapMap = {}
    for i in valcap:
        vallistedCapMap[i['id']] = [
            dict([('caption', i['caption']), ('image_id', i['image_id'])])
        ]
    valtokenizedListedCapMap = PTBTokenizer().tokenize(vallistedCapMap)

    teslistedCapMap = {}
    for i in tescap:
        teslistedCapMap[i['id']] = [
            dict([('caption', i['caption']), ('image_id', i['image_id'])])
        ]
    testokenizedListedCapMap = PTBTokenizer().tokenize(teslistedCapMap)

    valtokcap = [
    ]  #map caption ids to a map of its tokenized caption and image id
    for i, j in valtokenizedListedCapMap.iteritems():
        valtokcap += [(i,
                       dict([('caption', j[0]),
                             ('image_id', vallistedCapMap[i][0]['image_id'])]))
                      ]

    testokcap = []
    for i, j in testokenizedListedCapMap.iteritems():
        testokcap += [(i,
                       dict([('caption', j[0]),
                             ('image_id', teslistedCapMap[i][0]['image_id'])]))
                      ]

    f = open(data_folder + '/preprocessed/valtokcap.json', 'w')
    json.dump(valtokcap, f)
    f.close()

    f = open(data_folder + '/preprocessed/testokcap.json', 'w')
    json.dump(testokcap, f)
    f.close()
コード例 #2
0
def create_valtokcap(data_folder=DATA_FOLDER):
    import gc
    gc.collect()
    vcap = COCO(COCO_VALID_CAP_FILE)
    valimgids, tesimgids = getValimgids(), getTesimgids()
    valcap = []
    for i in valimgids:
        valcap += vcap.imgToAnns[i]

    tescap = []
    for i in tesimgids:
        tescap += vcap.imgToAnns[i]
        
    vallistedCapMap = {}
    for i in valcap:
        vallistedCapMap[i['id']] = [dict([('caption',i['caption']), ('image_id', i['image_id'])])]
    valtokenizedListedCapMap = PTBTokenizer().tokenize(vallistedCapMap)

    teslistedCapMap = {}
    for i in tescap:
        teslistedCapMap[i['id']] = [dict([('caption',i['caption']), ('image_id', i['image_id'])])]
    testokenizedListedCapMap = PTBTokenizer().tokenize(teslistedCapMap)
    
    valtokcap = [] #map caption ids to a map of its tokenized caption and image id
    for i, j in valtokenizedListedCapMap.iteritems():
        valtokcap += [(i, dict([('caption', j[0]), ('image_id', vallistedCapMap[i][0]['image_id'])]))]

    testokcap = []
    for i, j in testokenizedListedCapMap.iteritems():
        testokcap += [(i, dict([('caption', j[0]), ('image_id', teslistedCapMap[i][0]['image_id'])]))]

    f = open(data_folder + '/preprocessed/valtokcap.json', 'w')
    json.dump(valtokcap, f)
    f.close()

    f = open(data_folder + '/preprocessed/testokcap.json', 'w')
    json.dump(testokcap, f)
    f.close()
コード例 #3
0
def create_tokcap(data_folder=DATA_FOLDER):
    cap = COCO(COCO_TRAIN_CAP_FILE)
    
    listedCapMap = {}
    for i in cap.loadAnns(cap.getAnnIds()):
        listedCapMap[i['id']] = [dict([('caption',i['caption']), ('image_id', i['image_id'])])]
    tokenizedListedCapMap = PTBTokenizer().tokenize(listedCapMap)
    
    tokcap = [] #map caption ids to a map of its tokenized caption and image id
    for i, j in tokenizedListedCapMap.iteritems():
        tokcap += [(i, dict([('caption', j[0]), ('image_id', listedCapMap[i][0]['image_id'])]))]
    
    f = open(data_folder + '/preprocessed/tokcap.json', 'w')
    json.dump(tokcap, f)
    f.close()
コード例 #4
0
def create_tokcap(data_folder=DATA_FOLDER):
    cap = COCO(COCO_TRAIN_CAP_FILE)

    listedCapMap = {}
    for i in cap.loadAnns(cap.getAnnIds()):
        listedCapMap[i['id']] = [
            dict([('caption', i['caption']), ('image_id', i['image_id'])])
        ]
    tokenizedListedCapMap = PTBTokenizer().tokenize(listedCapMap)

    tokcap = [
    ]  #map caption ids to a map of its tokenized caption and image id
    for i, j in tokenizedListedCapMap.iteritems():
        tokcap += [(i,
                    dict([('caption', j[0]),
                          ('image_id', listedCapMap[i][0]['image_id'])]))]

    f = open(data_folder + '/preprocessed/tokcap.json', 'w')
    json.dump(tokcap, f)
    f.close()