Beispiel #1
0
def add_object_data(objects, image_id, all_nodes, all_edges, wn2label,
                    wn2image):
    obj2names = {}
    image_node = create_uri(VG_NS, 'I' + image_id)
    #    image_metadata={'image_ids': [image_id]}
    image_metadata = {}
    for o in objects:
        names = []
        for name in o['names']:
            name = name.strip()
            if not name: continue
            o_id = create_uri(VG_NS, name.replace(' ', '_'))

            o_pos = ''
            if 'attributes' in o.keys():
                for attr in o['attributes']:
                    a_id = create_uri(VG_NS, attr.replace(' ', '_'))
                    a_pos = ''
                    if attr in attr_synsets:
                        a_synset = attr_synsets[attr]
                        attr_wn_edge = [
                            a_id, WORDNET_SENSE_REL,
                            create_uri(WORDNET_NS, a_synset), data_source,
                            weight, image_metadata
                        ]
                        all_edges.append(attr_wn_edge)

                        # save wordnet data for an attribute
                        wn2label[a_synset].add(attr)
                        wn2image[a_synset].add(image_id)
                        a_pos = a_synset.split('.')[-2]
                        o_pos = a_pos

                    # attribute node
                    a_label, a_aliases = add_lowercase_labels([attr])
                    attr_node = [
                        a_id, a_label, ','.join(a_aliases), a_pos, data_source,
                        image_metadata
                    ]
                    all_nodes.append(attr_node)

                    # edge from object to an attribute
                    obj_attr_edge = [
                        o_id, RELATEDTO_REL, a_id, data_source, weight,
                        image_metadata
                    ]
                    all_edges.append(obj_attr_edge)

            obj_node = [o_id, name, '', o_pos, data_source, image_metadata]
            all_nodes.append(obj_node)
            names.append(name)

        obj_img_edge = [
            o_id, INIMAGE_REL, image_node, data_source, weight, image_metadata
        ]
        all_edges.append(obj_img_edge)

        obj2names[str(o['object_id'])] = names

    return all_nodes, all_edges, wn2label, wn2image, obj2names
Beispiel #2
0
def add_utensil():
    """ Ajouter un ustensile """
    store_file = STORE['utensils']
    label = request.json['label']
    actions = [create_uri(uri) for uri in request.json['actions']]

    uri = create_uri(BASE_URI_UTENSIL + sanitize(label))

    load_rdf_file(store_file)
    Utensil(resUri=uri, label=label, actions=actions)
    save_rdf_file(store_file)

    return jsonify({'uri': uri})
Beispiel #3
0
def add_relationships_data(rels, image_id, all_nodes, all_edges, wn2label,
                           wn2image):
    for rel in rels:
        rel_id = create_uri(VG_NS, 'R' + str(rel['relationship_id']))
        sub_id = create_uri(VG_NS, 'O' + str(rel['subject_id']))
        obj_id = create_uri(VG_NS, 'O' + str(rel['object_id']))
        synsets = rel['synsets']
        pred = rel['predicate']

        # CREATE REL-SUBJECT and REL-OBJECT EDGES
        rel_subj_col = [
            rel_id, SUBJECT_REL, sub_id, data_source, weight, {
                'image_id': image_id
            }
        ]
        all_edges.append(rel_subj_col)

        rel_obj_col = [
            rel_id, OBJECT_REL, obj_id, data_source, weight, {
                'image_id': image_id
            }
        ]
        all_edges.append(rel_obj_col)

        pos = ''
        for s in synsets:
            rel_syn_col = [
                rel_id, WORDNET_SENSE_REL,
                create_uri(WORDNET_NS, s), data_source, weight, {
                    'image_id': image_id
                }
            ]
            all_edges.append(rel_syn_col)

            wn2label[s].add(pred)
            wn2image[s].add(image_id)

            pos = s.split('.')[-2]

            all_rel_synsets.append(s)

        # CREATE relationship node
        label, aliases = add_lowercase_labels([pred])
        rel_node = [
            rel_id, label, ','.join(aliases), pos, data_source, {
                'image_id': image_id
            }
        ]
        all_nodes.append(rel_node)

    return all_nodes, all_edges, wn2label, wn2image
Beispiel #4
0
def add_attr_data(attrs, image_id, all_nodes, all_edges, wn2label, wn2image,
                  attr_id):
    for a in attrs:
        a_id = create_uri(VG_NS, 'A' + str(attr_id))
        attr_id += 1

        # attribute-related edges
        obj_attr_edge = [
            obj_id, HAS_PROPERTY_REL, a_id, data_source, weight, {
                'image_id': image_id
            }
        ]
        all_edges.append(obj_attr_edge)

        a_pos = ''
        if a in attr_synsets:
            a_synset = attr_synsets[a]

            attr_wn_edge = [
                a_id, WORDNET_SENSE_REL,
                create_uri(WORDNET_NS, a_synset), data_source, weight, {
                    'image_id': image_id
                }
            ]
            all_edges.append(attr_wn_edge)

            # save wordnet data for an attribute
            wn2label[a_synset].add(a)
            wn2image[a_synset].add(image_id)
            a_pos = a_synset.split('.')[-2]

            all_attr_synsets.append(a_synset)

        # attribute node
        a_label, a_aliases = add_lowercase_labels([a])
        attr_node = [
            a_id, a_label, ','.join(a_aliases), a_pos, data_source, {
                'image_id': image_id
            }
        ]
        all_nodes.append(attr_node)

    return all_nodes, all_edges, wn2label, wn2image, attr_id
Beispiel #5
0
def add_action():
    """ Ajouter une action """
    store_file = STORE['actions']
    label = request.json['label']

    uri = create_uri(BASE_URI_ACTION + sanitize(label))

    load_rdf_file(store_file)
    Action(resUri=uri, label=label)
    save_rdf_file(store_file)

    # On retourne une chaine vide pour renvoyer un code HTTP 200
    return jsonify({'uri': uri})
cn_nodes_file = f'../output_v{VERSION}/conceptnet/nodes_v{VERSION}.csv'
wn_nodes_file = f'../output_v{VERSION}/wordnet/nodes_v{VERSION}.csv'
vg_nodes_file = f'../output_v{VERSION}/visualgenome/nodes_v{VERSION}.csv'

wordnet30_ili_file = '../input/mappings/ili-map-pwn30.tab'
wordnet31_ili_file = '../input/mappings/ili-map-pwn31.tab'

# OUTPUT FILE
output_dir = f'../output_v{VERSION}/mappings'
edges_file = f'{output_dir}/wn_wn_mappings.csv'

MOWGLI_NS = config.mowgli_ns
WORDNET_NS = config.wordnet_ns

SAMEAS_REL = create_uri(MOWGLI_NS, config.sameas)

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

### Load the CN filtered data in pandas ###

df = pd.read_csv(cnfile, sep='\t', header=None, converters={4: json.loads})
df.columns = ['assertion', 'rel', 'subj', 'obj', 'metadata']
df.drop(columns=['assertion'])
print('size of df with external URLs', len(df))

df_wordnet = df.loc[(df['rel'] == '/r/ExternalURL')
                    & (df['obj'].str.contains(r'http://wordnet-'))]
print('size of df with wordnet external links', len(df_wordnet))
Beispiel #7
0
    os.path.split(os.path.abspath(__file__))[0], DATA_DIR)
DICTIONARY_FILE = concat(ABS_DATA_DIR,
                         LANG,
                         concat(FILENAME, EXT, sep='.'),
                         sep='/')

if __name__ == '__main__':
    cols = {
        'word_id': Integer,
        'word': String(35),
        'category_1': String(20),
        'category_2': String(20),
        'category_3': String(20),
        'category_4': String(20),
        'category_5': String(20),
        'category_6': String(20),
        'category_7': String(20),
        'category_8': String(20),
        'category_9': String(20),
        'category_10': String(20),
        'category_11': String(20),
        'category_12': String(20)
    }

    builder = Builder(filepath=DICTIONARY_FILE, sep='\t', db_uri=create_uri())
    builder.primary_col_index = 0
    builder.build('C', cols, LANG)
    # builder.resume('C', cols, LANG, 0, 4000)
    # print(builder.read('X', row_num = 150))
    del builder
Beispiel #8
0
data_source = config.mw_ds
weight = 1.0
VERSION = config.VERSION

EDGE_COLS = config.edges_cols

# INPUT FILE
mapping_file = '../input/mappings/Edges_WordNet2Wikidata_New.csv'

# OUTPUT FILE
output_dir = f'../output_v{VERSION}/mappings'
edges_file = f'{output_dir}/wn_wdt_mappings.csv'

MOWGLI_NS = config.mowgli_ns
WORDNET_NS = config.wordnet_ns

SAMEAS_REL = utils.create_uri(MOWGLI_NS, config.sameas)

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

edges_df = pd.read_csv(mapping_file,
                       sep='\t',
                       header=0,
                       converters={5: json.loads})
edges_df.sort_values(by=['subject', 'predicate', 'object']).to_csv(edges_file,
                                                                   index=False,
                                                                   sep='\t')

print(len(edges_df), 'edges stored')
Beispiel #9
0
    all_edges.append(edge_data)

#### a. Prepare and store nodes ####

all_nodes = []
for n, datasets in node_datasets.items():
    label = cn.uri_to_label(n)
    aliases_list = []
    aliases = ','.join(aliases_list)
    mapped_pos, raw_pos = get_cn_pos_tag(n, MOWGLI_NS, POS_MAPPING)
    other = {'datasets': list(datasets)}
    col = [n, label, aliases, raw_pos, data_source, other]
    all_nodes.append(col)

for raw_pos, mapped_pos in POS_MAPPING.items():
    mowgli_pos = create_uri(MOWGLI_NS, mapped_pos)
    col = [
        mowgli_pos, raw_pos, mapped_pos, '', '', {
            "datasets": [CUSTOM_DATASET]
        }
    ]
    all_nodes.append(col)

nodes_df = pd.DataFrame(all_nodes, columns=NODE_COLS)
nodes_df.sort_values('id').to_csv(nodes_file, index=False, sep='\t')
print('unique POS tags', nodes_df['pos'].unique())
print(len(nodes_df), 'nodes')

#### b. Enrich and store edges ####

all_edges_enriched = copy.deepcopy(all_edges)
Beispiel #10
0
def add_relationships_data(rels, obj2names, image_id, all_nodes, all_edges,
                           wn2label, wn2image):

    image_node = create_uri(VG_NS, 'I' + image_id)
    image_metadata = {}
    for rel in rels:
        synsets = rel['synsets']
        pred = rel['predicate']
        pred_id = create_uri(VG_NS, pred.replace(' ', '_'))
        sub_names = obj2names[str(rel['subject_id'])]
        obj_names = obj2names[str(rel['object_id'])]

        for sub_name in sub_names:
            sub_id = create_uri(VG_NS, sub_name.replace(' ', '_'))
            for obj_name in obj_names:
                obj_id = create_uri(VG_NS, obj_name.replace(' ', '_'))

                rel_edge = [
                    pred_id,
                    create_uri(VG_NS, 'subject'), sub_id, data_source, weight,
                    image_metadata
                ]
                all_edges.append(rel_edge)

                rel_edge = [
                    pred_id,
                    create_uri(VG_NS, 'object'), obj_id, data_source, weight,
                    image_metadata
                ]
                all_edges.append(rel_edge)

                rel_edge = [
                    sub_id, RELATEDTO_REL, obj_id, data_source, weight,
                    image_metadata
                ]
                all_edges.append(rel_edge)

                rel_edge = [
                    obj_id, RELATEDTO_REL, sub_id, data_source, weight,
                    image_metadata
                ]
                all_edges.append(rel_edge)

        pos = ''
        for s in synsets:
            rel_syn_col = [
                pred_id, WORDNET_SENSE_REL,
                create_uri(WORDNET_NS, s), data_source, weight, image_metadata
            ]
            all_edges.append(rel_syn_col)

            wn2label[s].add(pred)
            wn2image[s].add(image_id)

            pos = s.split('.')[-2]

            all_rel_synsets.append(s)

        # CREATE relationship node
        label, aliases = add_lowercase_labels([pred])
        rel_node = [
            pred_id, label, ','.join(aliases), pos, data_source, image_metadata
        ]
        all_nodes.append(rel_node)

        rel_img_edge = [
            pred_id, INIMAGE_REL, image_node, data_source, weight,
            image_metadata
        ]
        all_edges.append(rel_img_edge)

    return all_nodes, all_edges, wn2label, wn2image
Beispiel #11
0
with open(vg_scene_path, 'r') as f:
    images_data = json.load(f)

with open(attr_synsets_path, 'r') as f:
    attr_synsets = json.load(f)

print('num images', len(images_data))

NODE_COLS = config.nodes_cols
EDGE_COLS = config.edges_cols

MOWGLI_NS = config.mowgli_ns
WORDNET_NS = config.wordnet_ns
VG_NS = config.visualgenome_ns

WORDNET_SENSE_REL = create_uri(VG_NS, config.pwordnet_sense)
SUBJECT_REL = create_uri(VG_NS, config.subject)
OBJECT_REL = create_uri(VG_NS, config.objct)
INIMAGE_REL = create_uri(VG_NS, config.in_image)

RELATEDTO_REL = config.related_to

CUSTOM_DATASET = config.custom_dataset

data_source = config.vg_ds
weight = 1.0

debug = False

### Load the data into two tables: nodes (from objects with attributes) and edges (from relationships) WITH deduplication ###
#### Process edges first ####
Beispiel #12
0
with open(vg_scene_path, 'r') as f:
    images_data = json.load(f)

with open(attr_synsets_path, 'r') as f:
    attr_synsets = json.load(f)

print('num images', len(images_data))

NODE_COLS = config.nodes_cols
EDGE_COLS = config.edges_cols

MOWGLI_NS = config.mowgli_ns
WORDNET_NS = config.wordnet_ns
VG_NS = config.visualgenome_ns

WORDNET_SENSE_REL = create_uri(VG_NS, config.pwordnet_sense)
SUBJECT_REL = create_uri(VG_NS, config.subject)
OBJECT_REL = create_uri(VG_NS, config.objct)
INIMAGE_REL = create_uri(VG_NS, config.in_image)

HAS_PROPERTY_REL = config.has_prop

CUSTOM_DATASET = config.custom_dataset

data_source = config.vg_ds
weight = "1.0"

### Load the data into two tables: nodes (from objects with attributes) and edges (from relationships) WITH deduplication ###
#### Process edges first ####

all_edges = []