Beispiel #1
0
def find_mentions(entities):
    """
    Find unique entities and their mentions
    Args:
        entities: (dic) a struct for each entity
    Returns: (dic) unique entities based on their grounded ID, if -1 ID=UNK:No
    """
    equivalents = []
    for e in entities:
        if e.kb_id not in equivalents:
            equivalents.append(e.kb_id)

    # mention-level data sets
    g = to_graph(equivalents)
    cc = connected_components(g)

    unique_entities = OrderedDict()
    unk_id = 0
    for c in cc:
        if tuple(c)[0] == '-1':
            continue
        unique_entities[tuple(c)] = []

    # consider non-grounded entities as separate entities
    for e in entities:
        if e.kb_id[0] == '-1':
            unique_entities[tuple(('UNK:' + str(unk_id),))] = [e]
            unk_id += 1
        else:
            for ue in unique_entities.keys():
                if list(set(e.kb_id).intersection(set(ue))):
                    unique_entities[ue] += [e]

    return unique_entities
Beispiel #2
0
def preprocess(raw_data, dataset):
    global all_values
    mean = np.mean(all_values, axis=0).tolist()
    std = np.std(all_values, axis=0).tolist()
    mean = np.array(mean)
    std = np.array(std)

    print('parsing smiles as graphs...')
    processed_data = {'train': [], 'valid': [], 'test': []}

    file_count = 0
    for section in ['train', 'valid', 'test']:
        for i, (smiles, prop) in enumerate([(mol['smiles'], mol['prop'])
                                            for mol in raw_data[section]]):
            nodes, edges = to_graph(smiles, dataset)
            if len(edges) <= 0:
                continue
            prop = np.array(prop)
            processed_data[section].append({
                'targets': prop.tolist(),
                'graph': edges,
                'node_features': nodes,
            })
            if file_count % 2000 == 0:
                print('finished processing: %d' % file_count, end='\r')
            file_count += 1
        print('%s: 100 %%      ' % (section))
        with open('molecules_%s_%s.json' % (section, dataset), 'w') as f:
            json.dump(processed_data[section], f)
Beispiel #3
0
def preprocess(raw_data, dataset):
    print('parsing smiles as graphs...')
    processed_data = {'train': [], 'valid': []}

    file_count = 0
    for section in ['train', 'valid']:
        all_smiles = []  # record all smiles in training dataset
        for i, (smiles, QED) in enumerate([(mol['smiles'], mol['QED'])
                                           for mol in raw_data[section]]):
            nodes, edges = to_graph(smiles, dataset)
            if len(edges) <= 0:
                continue
            processed_data[section].append({
                'targets': [[(QED)]],
                'graph': edges,
                'node_features': nodes,
                'smiles': smiles
            })
            all_smiles.append(smiles)
            if file_count % 2000 == 0:
                print('finished processing: %d' % file_count, end='\r')
            file_count += 1
        print('%s: 100 %%      ' % (section))
        # save the dataset
        with open('molecules_%s_%s.json' % (section, dataset), 'w') as f:
            json.dump(processed_data[section], f)
        # save all molecules in the training dataset
        if section == 'train':
            utils.dump('smiles_%s.pkl' % dataset, all_smiles)
Beispiel #4
0
def preprocess(raw_data, dataset):
    print('Parsing smiles as graphs...')
    processed_data = {'train': [], 'valid': [], 'test': []}

    file_count = 0
    for section in ['train', 'valid', 'test']:
        all_smiles = []  # record all smiles in training dataset
        for i, (smiles, QED,
                hist) in enumerate([(mol['smiles'], mol['QED'], mol['hist'])
                                    for mol in raw_data[section]]):
            nodes, edges = utils.to_graph(smiles, dataset)
            if len(edges) <= 0:
                print('Error. Molecule with len(edges) <= 0')
                continue
            processed_data[section].append({
                'targets': [[QED]],
                'graph': edges,
                'node_features': nodes,
                'smiles': smiles,
                'hist': hist
            })
            all_smiles.append(smiles)
            if file_count % 1000 == 0:
                print('Finished processing: %d' % file_count, end='\r')
            file_count += 1
        print('%s: 100 %%                   ' % (section))
        with open('molecules_%s_%s.json' % (section, dataset), 'w') as f:
            json.dump(processed_data[section], f)

    print("Train molecules = " + str(len(processed_data['train'])))
    print("Valid molecules = " + str(len(processed_data['valid'])))
    print("Test molecules = " + str(len(processed_data['test'])))
def build_model(image, param1, param2, cycle4, cycle8, facet):
    """
    build ilp model for piecewise linear
    """
    print("Building Cplex model...")
    # initialize model
    model = cplex.Cplex()
    # set sense
    model.objective.set_sense(model.objective.sense.minimize)
    # get discrete second derivative
    derivative = utils.get_derivative(image)

    # build graph
    graph = utils.to_graph(image)

    # build objective function
    vars = get_varibles(image)
    model.variables.add(names=vars)
    colnames, obj, types = get_obj(derivative, param2)
    model.variables.add(obj=obj, types=types, names=colnames)

    # add constraints
    rows, senses, rhs = get_constraints(image,
                                        derivative,
                                        param1,
                                        cycle4=cycle4,
                                        cycle8=cycle8)
    model.linear_constraints.add(lin_expr=rows, senses=senses, rhs=rhs)

    # parallel
    model.parameters.parallel.set(-1)
    model.parameters.threads.set(32)

    # register callback
    #model.register_callback(cutremoveCallback)
    model.register_callback(multicutCallback)
    # associate additional data
    multicutCallback._graph = graph.copy()
    multicutCallback._names = model.variables.get_names()
    multicutCallback._facet = facet
    #cutremoveCallback._names = model.variables.get_names()

    return model
def affine_regression(image):
    """
    perform a parametric affine fitting
    """
    # build graph
    graph = utils.to_graph(image)

    affine_params = np.zeros((*image.shape, 3))
    print("Fitting affine parameters...")
    for (i, j) in graph.nodes():
        # find the best 4 points fiting plane
        mse = float("inf")
        #======================= right and down ================================
        # avoid out of bound
        if i < image.shape[0] - 1 and j < image.shape[1] - 1:
            X = [[i, j]]
            y = [image[i, j]]
            # down neighbor
            X.append([i + 1, j])
            y.append(image[i + 1, j])
            # right neighbor
            X.append([i, j + 1])
            y.append(image[i, j + 1])
            # down right neighbor
            X.append([i + 1, j + 1])
            y.append(image[i + 1, j + 1])

            # linear regression
            cur_affine_param, cur_mse = fit(X, y)
            if cur_mse < mse:
                affine_param, mse = cur_affine_param, cur_mse

        #========================== left and down ===============================
        # avoid out of bound
        if i < image.shape[0] - 1 and j:
            X = [[i, j]]
            y = [image[i, j]]
            # down neighbor
            X.append([i + 1, j])
            y.append(image[i + 1, j])
            # left neighbor
            X.append([i, j - 1])
            y.append(image[i, j - 1])
            # down left neighbor
            X.append([i + 1, j - 1])
            y.append(image[i + 1, j - 1])

            # linear regression
            cur_affine_param, cur_mse = fit(X, y)
            if cur_mse < mse:
                affine_param, mse = cur_affine_param, cur_mse

        #========================== right and up ===============================
        # avoid out of bound
        if i and j < image.shape[1] - 1:
            X = [[i, j]]
            y = [image[i, j]]
            # up neighbor
            X.append([i - 1, j])
            y.append(image[i - 1, j])
            # right neighbor
            X.append([i, j + 1])
            y.append(image[i, j + 1])
            # up right neighbor
            X.append([i - 1, j + 1])
            y.append(image[i - 1, j + 1])

            # linear regression
            cur_affine_param, cur_mse = fit(X, y)
            if cur_mse < mse:
                affine_param, mse = cur_affine_param, cur_mse

        #========================= left and up =================================
        # avoid out of bound
        if i and j:
            X = [[i, j]]
            y = [image[i, j]]
            # down neighbor
            X.append([i - 1, j])
            y.append(image[i - 1, j])
            # left neighbor
            X.append([i, j - 1])
            y.append(image[i, j - 1])
            # down left neighbor
            X.append([i - 1, j - 1])
            y.append(image[i - 1, j - 1])

            # linear regression
            cur_affine_param, cur_mse = fit(X, y)
            if cur_mse < mse:
                affine_param, mse = cur_affine_param, cur_mse

        # record best affine parameters
        affine_params[i, j] = affine_param

    # set attribute
    for (i, j) in graph.nodes():
        # number of nodes as weight
        graph.nodes[(i, j)]["weight"] = 1
        # coordinates and depth
        graph.nodes[(i, j)]["pixels"] = np.array([[i, j, image[i, j]]])
        # affine parameters
        graph.nodes[(i, j)]["affine_params"] = affine_params[i, j]

    return graph