Beispiel #1
0
 def bootstrap_pass_one(cls, model_class, xml_tag, id_attr="id", name_attr="name", skip_without=None):
     # Pass one.
     template = u"{} (Pass 1) (idx:{}) (id:{}) [{:.8f}]: {}"
     xml_path = Bootstrapper.get_xml_path(xml_tag)
     print(xml_path)
     with gzip.GzipFile(xml_path, "r") as file_pointer:
         iterator = Bootstrapper.iterparse(file_pointer, xml_tag)
         for i, element in enumerate(iterator):
             data = None
             try:
                 with systemtools.Timer(verbose=False) as timer:
                     data = model_class.tags_to_fields(element)
                     if skip_without:
                         if any(not data.get(_) for _ in skip_without):
                             continue
                     if element.get("id"):
                         data["id"] = element.get("id")
                     data["random"] = random.random()
                     document = model_class.create(**data)
                 message = template.format(
                     model_class.__name__.upper(),
                     i,
                     getattr(document, id_attr),
                     timer.elapsed_time,
                     getattr(document, name_attr),
                 )
                 print(message)
             except peewee.DataError as e:
                 pprint.pprint(data)
                 traceback.print_exc()
                 raise (e)
Beispiel #2
0
 def bootstrap_pass_one(cls):
     # Pass one.
     labels_xml_path = Bootstrapper.labels_xml_path
     with gzip.GzipFile(labels_xml_path, 'r') as file_pointer:
         iterator = Bootstrapper.iterparse(file_pointer, 'label')
         iterator = Bootstrapper.clean_elements(iterator)
         for i, element in enumerate(iterator):
             try:
                 with systemtools.Timer(verbose=False) as timer:
                     document = cls.from_element(element)
                     cls.objects.insert(document, load_bulk=False)
                     #document.save()
                     #document.save(force_insert=True)
                 message = u'{} (Pass 1) {} [{}]: {}'.format(
                     cls.__name__.upper(),
                     document.discogs_id,
                     timer.elapsed_time,
                     document.name,
                     )
                 print(message)
             except mongoengine.errors.ValidationError:
                 traceback.print_exc()
Beispiel #3
0
 def bootstrap_pass_one(
     cls,
     model_class,
     xml_tag,
     id_attr='id',
     name_attr='name',
     skip_without=None,
     ):
     # Pass one.
     template = u'{} (Pass 1) (idx:{}) (id:{}) [{:.8f}]: {}'
     xml_path = Bootstrapper.get_xml_path(xml_tag)
     print(xml_path)
     with gzip.GzipFile(xml_path, 'r') as file_pointer:
         iterator = Bootstrapper.iterparse(file_pointer, xml_tag)
         for i, element in enumerate(iterator):
             data = None
             try:
                 with systemtools.Timer(verbose=False) as timer:
                     data = model_class.tags_to_fields(element)
                     if skip_without:
                         if any(not data.get(_) for _ in skip_without):
                             continue
                     if element.get('id'):
                         data['id'] = element.get('id')
                     data['random'] = random.random()
                     document = model_class.create(**data)
                 message = template.format(
                     model_class.__name__.upper(),
                     i,
                     getattr(document, id_attr),
                     timer.elapsed_time,
                     getattr(document, name_attr),
                     )
                 print(message)
             except peewee.DataError as e:
                 pprint.pprint(data)
                 traceback.print_exc()
                 raise(e)