コード例 #1
0
ファイル: __init__.py プロジェクト: francescoinfante/identity
def _merge_records(args):
    data = args[0]
    config = args[1]
    if len(args) > 2:
        all_data = args[2]
    else:
        all_data = data

    if len(data) == 1:
        return data[0]

    result = {}

    for k, v in config.iteritems():
        if isinstance(v, dict):
            result[k] = _merge_records((extract_from_tuple(data, k), v, all_data))
        elif isinstance(v, tuple):
            for x in v:
                if isclass(x):
                    x = x()
                result[k] = x.resolve(extract_from_tuple(data, k), all_data)
                if result[k] is not None:
                    break
        else:
            if isclass(v):
                v = v()
            result[k] = v.resolve(extract_from_tuple(data, k), all_data)

    return result
コード例 #2
0
 def resolve(self, conflict_data, all_data):
     dates = extract_from_tuple(all_data, self._date_field)
     result = conflict_data[0]
     last_date = dates[0]
     for x in range(0, len(dates)):
         if dates[x] > last_date and conflict_data[x] is not None:
             result = conflict_data[x]
             last_date = dates[x]
     return result
コード例 #3
0
ファイル: __init__.py プロジェクト: francescoinfante/identity
 def next(self):
     pair = self.pairs.next()
     features_vector = {}
     for feat, path in self.features:
         if isclass(feat):
             feat = feat()
         e = extract_from_tuple(pair, path)
         result = feat.extract(e[0], e[1])
         if isinstance(result, dict):
             features_vector.update({feat.__class__.__name__ + '@' + path + ':' + str(name): value for name, value in
                                     result.iteritems()})
         else:
             features_vector[feat.__class__.__name__ + '@' + path] = result
     return features_vector
コード例 #4
0
ファイル: __init__.py プロジェクト: francescoinfante/identity
    def __init__(self, blocking_algorithm, unique_attribute, debug=False):
        consumed = set()
        self._pairs = []
        count = 0

        if debug:
            logger.info('joinblock start')

        for x in blocking_algorithm:
            for y in x:

                count += 1

                if debug and count % 10000 == 0:
                    logger.info('tick ' + str(count))

                u = tuple(sorted(extract_from_tuple(y, unique_attribute)))

                if u not in consumed:
                    consumed.add(u)
                    self._pairs.append(y)

        if debug:
            logger.info('joinblock done')
コード例 #5
0
 def resolve(self, conflict_data, all_data):
     sources = extract_from_tuple(all_data, self._source_field)
     for x in range(0, len(sources)):
         if sources[x] == self._source_value:
             return conflict_data[x]