Example #1
0
def interlinking_star_search(context, data_dict):
    ''' It searches lucene with a '*' wildcard. The wildcard is positioned at the end of \
    the search string. 
    '''
    schema = context.get('schema', dsschema.interlinking_star_search_schema())
    data_dict, errors = _validate(data_dict, schema, context)
    if errors:
        raise p.toolkit.ValidationError(errors)
    
    term = data_dict.get('term')
    reference_resource = data_dict.get('reference_resource')
    
    terms = lucene_access.search(term, reference_resource, 'like')
    if isinstance(terms, int):
        return ''
    
    return terms
Example #2
0
def _interlink_column(context, res, col_name, original_ds, new_ds, reference, ref_fields):
    res_id = original_ds.get('resource_id')
    total = original_ds.get('total')
    columns = json.loads(res.get('interlinking_columns_status','{}'))
    # The interlinked column is marked with the reference resource with which it is interlinked.
    for k,v in columns.iteritems():
        if k == col_name:
            columns.update({k:reference})
    columns = json.dumps(columns)
    
    original_res = p.toolkit.get_action('resource_show')(context, {'id': res.get('interlinking_parent_id')})
    original_res['interlinked_column'] = col_name
    original_res = p.toolkit.get_action('resource_update')(context, original_res)
        
    res = p.toolkit.get_action('resource_show')(context, res)
    res['interlinking_resource'] = True
    res['interlinking_columns_status'] = columns
    res['interlinking_status'] = 'undergoing'
    res['reference_fields'] = json.dumps(ref_fields)
    res = p.toolkit.get_action('resource_update')(context, res)
    
    STEP = 100
    offset = 0
    for k in range(0,int(ceil(total/float(STEP)))):
        offset = k*STEP
        recs = p.toolkit.get_action('datastore_search')(context, {
                                        'resource_id':res_id, 
                                        'offset': offset, 
                                        'limit': STEP, 
                                        'sort':'_id'}).get('records')
        nrecs = []
        for rec in recs:
            original_term = rec.get(col_name)
            suggestions = lucene_access.search(original_term, reference, 'search')
            
            if isinstance(suggestions, int):
                return -1
            # If any suggestions were returned
            if len(suggestions['records']) > 0:
                # The first field is the field on which the search was run
                search_field = suggestions['fields'][0]
                
                if len(suggestions['records']) > 0:
                    best_suggestion = suggestions['records'][0]
                    for suggestion in suggestions['records']:
                        if suggestion['scoreField'] > best_suggestion['scoreField']:
                            best_suggestion = suggestion
                            
                    nrec = {'_id': rec.get('_id'),
                            search_field: best_suggestion[search_field],
                            'int__score': best_suggestion['scoreField'],
                            'int__checked_flag': False,
                            'int__all_results': json.dumps(suggestions)}
                    for field in suggestions['fields']:
                        if field != search_field and field != 'scoreField':
                            nrec[field] = best_suggestion[field]
                    nrecs.append(nrec)
            # No suggestions were returned         
            else:
                real_fields = lucene_access.getFields(reference, False)
                if isinstance(real_fields, list):
                    suggestions = { "fields": real_fields,
                                    "records": [], 
                                   }
                    search_field = real_fields[0]
                    nrec = {'_id': rec.get('_id'),
                                search_field: "",
                                'int__score': "",
                                'int__checked_flag': False,
                                'int__all_results': json.dumps(suggestions)}
                    for field in suggestions['fields']:
                            if field != search_field and field != 'scoreField':
                                nrec[field] = ""
                    nrecs.append(nrec)
                else:
                    return -1
                
            
        ds = p.toolkit.get_action('datastore_upsert')(context,
                {
                    'resource_id': new_ds.get('resource_id'),
                    'allow_update_with_id':True,
                    'force': True,
                    'records': nrecs
                    })
          
        offset=offset+STEP
    return new_ds