예제 #1
0
            def match_one_set1_individual_pool(idx, sorted_idx, pool_size):
                global local_ctx
                
                set2_size = context_length(local_ctx)
                if not set2_size:
                    raise StopIteration
                
                if set2_size > pool_size:
                    pool = random.sample(xrange(context_length(local_ctx)), pool_size)
                else:
                    pool = range(set2_size)

                sub_local_ctx = context_subset(local_ctx, pool, None)
                sub_local_ctx.update((k, set1[k][sorted_idx]) for k in ['id'] + used_variables1)
                
                set2_scores = expr_eval(score_expr, sub_local_ctx)
    
                individual2_pool_idx = np.argmax(set2_scores)
                individual2_idx = pool[individual2_pool_idx]
                
                id1 = sub_local_ctx['id']
                id2 = local_ctx['__other_id'][individual2_idx]
    
                local_ctx = context_delete(local_ctx, individual2_idx)
    
                result[id_to_rownum[id1]] = id2
                result[id_to_rownum[id2]] = id1
예제 #2
0
         def match_one_set1_individual(idx, sorted_idx):
             global local_ctx
 
             if not context_length(local_ctx):
                 raise StopIteration
 
             local_ctx.update((k, set1[k][sorted_idx]) for k in ['id'] + used_variables1)
 
 #            pk = tuple(individual1[fname] for fname in pk_names)
 #            optimized_expr = optimized_exprs.get(pk)
 #            if optimized_expr is None:
 #                for name in pk_names:
 #                    fake_set1['__f_%s' % name].value = individual1[name]
 #                optimized_expr = str(symbolic_expr.simplify())
 #                optimized_exprs[pk] = optimized_expr
 #            set2_scores = evaluate(optimized_expr, mm_dict, set2)
 
             set2_scores = expr_eval(score_expr, local_ctx)
 
             individual2_idx = np.argmax(set2_scores)
 
             id1 = local_ctx['id']
             id2 = local_ctx['__other_id'][individual2_idx]
 
             local_ctx = context_delete(local_ctx, individual2_idx)
 
             result[id_to_rownum[id1]] = id2
             result[id_to_rownum[id2]] = id1            
예제 #3
0
파일: matching.py 프로젝트: abozio/Myliam2
         def match_one_set1_individual(idx, sorted_idx):
             global local_ctx   
             if not context_length(local_ctx):
                 raise StopIteration    
             local_ctx.update((k, set1[k][sorted_idx]) for k in used_variables1)
             set2_scores = expr_eval(score_expr, local_ctx)
 #            print set2_scores
             individual2_idx = np.argmax(set2_scores)   
             id1 = local_ctx['id']
             id2 = local_ctx['__other_id'][individual2_idx]    
             local_ctx = context_delete(local_ctx, individual2_idx)
 
             result[id_to_rownum[id1]] = id2
             result[id_to_rownum[id2]] = id1
예제 #4
0
        def match_cell(idx, sorted_idx, pool_size):
            global matching_ctx

            set2_size = context_length(matching_ctx)
            if not set2_size:
                raise StopIteration

            if pool_size is not None and set2_size > pool_size:
                pool = random.sample(xrange(set2_size), pool_size)
                local_ctx = context_subset(matching_ctx, pool)
            else:
                local_ctx = matching_ctx.copy()

            local_ctx.update((k, set1[k][sorted_idx])
                             for k in {'__ids__'} | used_variables1)

            eval_ctx = context.clone(entity_data=local_ctx)
            set2_scores = expr_eval(score, eval_ctx)
            cell2_idx = set2_scores.argmax()

            cell1ids = local_ctx['__ids__']
            cell2ids = local_ctx['__other___ids__'][cell2_idx]

            if pool_size is not None and set2_size > pool_size:
                # transform pool-local index to set/matching_ctx index
                cell2_idx = pool[cell2_idx]

            cell1size = len(cell1ids)
            cell2size = len(cell2ids)
            nb_match = min(cell1size, cell2size)

            # we could introduce a random choice here but it is not
            # much necessary. In that case, it should be done in group_context
            ids1 = cell1ids[:nb_match]
            ids2 = cell2ids[:nb_match]

            result[id_to_rownum[ids1]] = ids2
            result[id_to_rownum[ids2]] = ids1
            
            if nb_match == cell2size:
                matching_ctx = context_delete(matching_ctx, cell2_idx)
            else:
                # other variables do not need to be modified since the cell
                # only got smaller and was not deleted
                matching_ctx['__other___ids__'][cell2_idx] = cell2ids[nb_match:]

            # FIXME: the expr gets cached for the full matching_ctx at the
            # beginning and then when another women with the same values is
            # found, it thinks it can reuse the expr but it breaks because it
            # has not the correct length.

            # the current workaround is to invalidate the whole cache for the
            # current entity but this is not the right way to go.
            # * disable the cache for matching?
            # * use a local cache so that methods after matching() can use
            # what was in the cache before matching(). Shouldn't the cache be
            # stored inside the context anyway?
            expr_cache.invalidate(context.period, context.entity_name)

            if nb_match < cell1size:
                set1['__ids__'][sorted_idx] = cell1ids[nb_match:]
                match_cell(idx, sorted_idx, pool_size)