Esempio n. 1
0
def _initialize_columns(context, col_name, ds, total, reference_resource):
    # Get current datastore's fields
    current_fields = ds.get('fields')
    fields = current_fields
    
    # Get reference dataset's fields that should be stored in datastore
    reference_field_names = lucene_access.getFields(reference_resource, True)
    if isinstance(reference_field_names, list):
        # Get fields as they supposed to be stored in the datastore
        final_fields = []
        final_fields.append({'id': reference_field_names[0], 'type': 'text'})
        final_fields.append({'id': u"int__score", 'type': 'text'})
        final_fields.append({'id': u"int__checked_flag", 'type': 'boolean'})
        final_fields.append({'id': u"int__all_results", 'type': 'text'})
        for field in reference_field_names:
            if field != reference_field_names[0]:
                final_fields.append({'id': field, 'type': 'text'})
        
        # Check that all final_fields already exist in the datastore
        datastore_recreation_needed = False
        for final_field in final_fields:
            exists = False
            for current_field in current_fields:
                if final_field['id'] == current_field['id']:
                    exists = True
                    break
            if exists == False:
                datastore_recreation_needed = True
                break
        
        if datastore_recreation_needed == False:
            return
        
        # Drop and recreate datastore table
        p.toolkit.get_action('datastore_delete')(context, {'resource_id': ds['resource_id'], 
                                                           'force':True})
        # Update fields with datastore_create
        new_ds = p.toolkit.get_action('datastore_create')(context,
                {
                    'resource_id': ds.get('resource_id'),
                    'force':True,
                    'allow_update_with_id':True,
                    'fields': final_fields
                    #'records':[{col_name:''}]
                    })
        return final_fields
    else:
        # It carries -1 value as an error code
        return reference_field_names
Esempio n. 2
0
def _interlink_column(context, res, col_name, original_ds, new_ds, reference, ref_fields):
    res_id = original_ds.get('resource_id')
    total = original_ds.get('total')
    columns = json.loads(res.get('interlinking_columns_status','{}'))
    # The interlinked column is marked with the reference resource with which it is interlinked.
    for k,v in columns.iteritems():
        if k == col_name:
            columns.update({k:reference})
    columns = json.dumps(columns)
    
    original_res = p.toolkit.get_action('resource_show')(context, {'id': res.get('interlinking_parent_id')})
    original_res['interlinked_column'] = col_name
    original_res = p.toolkit.get_action('resource_update')(context, original_res)
        
    res = p.toolkit.get_action('resource_show')(context, res)
    res['interlinking_resource'] = True
    res['interlinking_columns_status'] = columns
    res['interlinking_status'] = 'undergoing'
    res['reference_fields'] = json.dumps(ref_fields)
    res = p.toolkit.get_action('resource_update')(context, res)
    
    STEP = 100
    offset = 0
    for k in range(0,int(ceil(total/float(STEP)))):
        offset = k*STEP
        recs = p.toolkit.get_action('datastore_search')(context, {
                                        'resource_id':res_id, 
                                        'offset': offset, 
                                        'limit': STEP, 
                                        'sort':'_id'}).get('records')
        nrecs = []
        for rec in recs:
            original_term = rec.get(col_name)
            suggestions = lucene_access.search(original_term, reference, 'search')
            
            if isinstance(suggestions, int):
                return -1
            # If any suggestions were returned
            if len(suggestions['records']) > 0:
                # The first field is the field on which the search was run
                search_field = suggestions['fields'][0]
                
                if len(suggestions['records']) > 0:
                    best_suggestion = suggestions['records'][0]
                    for suggestion in suggestions['records']:
                        if suggestion['scoreField'] > best_suggestion['scoreField']:
                            best_suggestion = suggestion
                            
                    nrec = {'_id': rec.get('_id'),
                            search_field: best_suggestion[search_field],
                            'int__score': best_suggestion['scoreField'],
                            'int__checked_flag': False,
                            'int__all_results': json.dumps(suggestions)}
                    for field in suggestions['fields']:
                        if field != search_field and field != 'scoreField':
                            nrec[field] = best_suggestion[field]
                    nrecs.append(nrec)
            # No suggestions were returned         
            else:
                real_fields = lucene_access.getFields(reference, False)
                if isinstance(real_fields, list):
                    suggestions = { "fields": real_fields,
                                    "records": [], 
                                   }
                    search_field = real_fields[0]
                    nrec = {'_id': rec.get('_id'),
                                search_field: "",
                                'int__score': "",
                                'int__checked_flag': False,
                                'int__all_results': json.dumps(suggestions)}
                    for field in suggestions['fields']:
                            if field != search_field and field != 'scoreField':
                                nrec[field] = ""
                    nrecs.append(nrec)
                else:
                    return -1
                
            
        ds = p.toolkit.get_action('datastore_upsert')(context,
                {
                    'resource_id': new_ds.get('resource_id'),
                    'allow_update_with_id':True,
                    'force': True,
                    'records': nrecs
                    })
          
        offset=offset+STEP
    return new_ds