Ejemplo n.º 1
0
 def model_fields_lists(self):
     """
     Lists of class-specific fields for each class, in order,
     so documents may be emitted as OrderedDicts with fields in order.
     HOSTS:PORT/INDEX/modelfields/collection/
     HOSTS:PORT/INDEX/modelfields/entity/
     HOSTS:PORT/INDEX/modelfields/segment/
     HOSTS:PORT/INDEX/modelfields/file/
     
     identifier.MODEL_REPO_MODELS
     Identifier.fields_module
     """
     DOCTYPE = 'esobjectfields'
     EXCLUDED = [
         'id',
         'title',
         'description',
     ]
     for model in MODEL_REPO_MODELS.keys():
         module = module_for_name(MODEL_REPO_MODELS[model]['module'])
         fields = [
             f['name'] for f in module.FIELDS
             if f['elasticsearch']['public'] and (f['name'] not in EXCLUDED)
         ]
         data = {
             'model': model,
             'fields': fields,
         }
         self.post_json(
             doc_type=DOCTYPE,
             document_id=model,
             json_text=json.dumps(data),
         )
Ejemplo n.º 2
0
 def model_fields_lists(self):
     """
     Lists of class-specific fields for each class, in order,
     so documents may be emitted as OrderedDicts with fields in order.
     HOSTS:PORT/INDEX/modelfields/collection/
     HOSTS:PORT/INDEX/modelfields/entity/
     HOSTS:PORT/INDEX/modelfields/segment/
     HOSTS:PORT/INDEX/modelfields/file/
     
     identifier.MODEL_REPO_MODELS
     Identifier.fields_module
     """
     DOCTYPE = 'esobjectfields'
     EXCLUDED = [
         'id', 'title', 'description',
     ]
     for model in MODEL_REPO_MODELS.keys():
         module = module_for_name(MODEL_REPO_MODELS[model]['module']
         )
         fields = [
             f['name'] for f in module.FIELDS
             if f['elasticsearch']['public'] and (f['name'] not in EXCLUDED)
         ]
         data = {
             'model': model,
             'fields': fields,
         }
         self.post_json(
             doc_type=DOCTYPE,
             document_id=model,
             json_text=json.dumps(data),
         )
Ejemplo n.º 3
0
 def export(json_paths, model, csv_path, required_only=False):
     """Write the specified objects' data to CSV.
     
     IMPORTANT: All objects in json_paths must have the same set of fields!
     
     TODO let user specify which fields to write
     TODO confirm that each identifier's class matches object_class
     
     @param json_paths: list of .json files
     @param model: str
     @param csv_path: Absolute path to CSV data file.
     @param required_only: boolean Only required fields.
     """
     object_class = identifier.class_for_name(
         identifier.MODEL_CLASSES[model]['module'],
         identifier.MODEL_CLASSES[model]['class']
     )
     module = modules.Module(identifier.module_for_name(
         identifier.MODEL_REPO_MODELS[model]['module']
     ))
     
     if hasattr(object_class, 'xmp') and not hasattr(object_class, 'mets'):
         # File or subclass
         json_paths = models.sort_file_paths(json_paths)
     else:
         # Entity or subclass
         json_paths = util.natural_sort(json_paths)
     json_paths_len = len(json_paths)
     
     Exporter._make_tmpdir(os.path.dirname(csv_path))
     
     headers = module.csv_export_fields(required_only)
     # make sure we export 'id' if it's not in model FIELDS (ahem, files)
     if 'id' not in headers:
         headers.insert(0, 'id')
     
     with codecs.open(csv_path, 'wb', 'utf-8') as csvfile:
         writer = fileio.csv_writer(csvfile)
         # headers in first line
         writer.writerow(headers)
         for n,json_path in enumerate(json_paths):
             i = identifier.Identifier(json_path)
             logging.info('%s/%s - %s' % (n+1, json_paths_len, i.id))
             obj = object_class.from_identifier(i)
             if obj:
                 writer.writerow(obj.dump_csv(headers=headers))
     
     return csv_path
Ejemplo n.º 4
0
 def _get_module(model):
     return modules.Module(
         identifier.module_for_name(
             identifier.MODEL_REPO_MODELS[model]['module']
         )
     )