def _process_data(self, data, processor_id, reference_mapping):
     logger.debug('processor_id=%s'%(processor_id,))
     extracted_data = self._get_from_model_data(data, attrib=reference_mapping[processor_id])
     processor = self._processors.get(processor_id)
     if processor:
         return processor.process(extracted_data)
     return extracted_data
Example #2
0
 def _process_data(self, data, processor_id, reference_mapping):
     logger.debug('processor_id=%s' % (processor_id, ))
     extracted_data = self._get_from_model_data(
         data, attrib=reference_mapping[processor_id])
     processor = self._processors.get(processor_id)
     if processor:
         return processor.process(extracted_data)
     return extracted_data
 def get_data_from(self, url):
     logger.debug(url)
     if not url:
         return []
     request = urllib2.Request(url)
     request.add_header("Authorization", "Basic %s" % self._auth_string)
     result = urllib2.urlopen(request)
     logger.debug("Result found:")
     return self._data_sanitizer.clean(result.read())
 def extract_model_data_from(self, data):
     if self._model_default is None:
             self._model_default = {}
     logger.debug("id_mapping: %s \n data_mapping: %s, \n, default: %s, processors:%s"%(self._model_id_mapping, self._model_data_mapping, self._model_default, self._processors))
     model_data = { attribute : self._process_data(data, processor_id=attribute, reference_mapping=self._model_data_mapping) for attribute in self._model_data_mapping}
     model_ids = {attribute : self._process_data(data, processor_id=attribute, reference_mapping=self._model_id_mapping) for attribute in self._model_id_mapping}
     model_ids.update(self._model_default)
     logger.debug("Model_info: \n %s \n %s"%(model_ids, model_data))
     return model_ids, model_data
Example #5
0
 def get_data_from(self, url):
     logger.debug(url)
     if not url:
         return []
     request = urllib2.Request(url)
     request.add_header("Authorization", "Basic %s" % self._auth_string)
     result = urllib2.urlopen(request)
     logger.debug("Result found:")
     return self._data_sanitizer.clean(result.read())
 def _clean_for_reading_as_json(self, data):
     logger.debug("cleaning data")
     logger.debug('Discard%s'%(self._discard_values,))
     for discard_value in self._discard_values:
         data = data.replace(discard_value, '')
     data = data.replace("self", "this")
     data = data.replace(" :", ":")
     data = data.replace('\n', '')
     data = data.replace('\u', '')
     data = data.strip()
     return data
Example #7
0
 def _clean_for_reading_as_json(self, data):
     logger.debug("cleaning data")
     logger.debug('Discard%s' % (self._discard_values, ))
     for discard_value in self._discard_values:
         data = data.replace(discard_value, '')
     data = data.replace("self", "this")
     data = data.replace(" :", ":")
     data = data.replace('\n', '')
     data = data.replace('\u', '')
     data = data.strip()
     return data
 def clean(self, data):
     try:
         readable_data = self._clean_for_reading_as_json(data)
         logger.debug("cleaned data ----->%s"%(readable_data,))
         logger.debug('eval the string to get dict')
         cleaned_data = json.loads(readable_data)
         if self._data_key:
             logger.debug('get the data')
             return_value = cleaned_data.get(self._data_key)
             if not return_value:
                 return None
             return return_value
         return cleaned_data
     except Exception as e:
         logger.debug(data)
         logger.error(e)
         return []
Example #9
0
 def clean(self, data):
     try:
         readable_data = self._clean_for_reading_as_json(data)
         logger.debug("cleaned data ----->%s" % (readable_data, ))
         logger.debug('eval the string to get dict')
         cleaned_data = json.loads(readable_data)
         if self._data_key:
             logger.debug('get the data')
             return_value = cleaned_data.get(self._data_key)
             if not return_value:
                 return None
             return return_value
         return cleaned_data
     except Exception as e:
         logger.debug(data)
         logger.error(e)
         return []
Example #10
0
 def extract_model_data_from(self, data):
     if self._model_default is None:
         self._model_default = {}
     logger.debug(
         "id_mapping: %s \n data_mapping: %s, \n, default: %s, processors:%s"
         % (self._model_id_mapping, self._model_data_mapping,
            self._model_default, self._processors))
     model_data = {
         attribute:
         self._process_data(data,
                            processor_id=attribute,
                            reference_mapping=self._model_data_mapping)
         for attribute in self._model_data_mapping
     }
     model_ids = {
         attribute:
         self._process_data(data,
                            processor_id=attribute,
                            reference_mapping=self._model_id_mapping)
         for attribute in self._model_id_mapping
     }
     model_ids.update(self._model_default)
     logger.debug("Model_info: \n %s \n %s" % (model_ids, model_data))
     return model_ids, model_data
Example #11
0
 def _create_model_using(self, data, parent=None):
     
     logger.debug("model_data %s"%(data,))
     try:
         model_ids, model_data = self._data_extractor.extract_model_data_from(data)
         model = self._model.get_or_create(**model_ids).update(**model_data)
         logger.debug(model)
         if parent and self._has_relationship!='':
             logger.debug('%s ----- %s ----> %s'%(parent, self._has_relationship, model))
             getattr(parent, self._has_relationship)(model)
         for child in self.children:
             child.end_node.execute(data=data, parent=model)
     except Exception as e:
         logger.error(e)
Example #12
0
    def execute(self, data=None, parent=None):
        logger.debug('begin execute')
        url = self._url_extractor.get_next_url(data)
        response = self._informer.using(self._data_sanitizer).get_data_from(url)
        if response is None:
            logger.debug("No data")
            return
        if isinstance(response, list):
            logger.debug("Is list")
            for reponse_object in response:
                self._create_model_using(reponse_object, parent)
        else:
            self._create_model_using(response, parent = parent)
        if self.next:
	            self.next.end_node.execute()
Example #13
0
    def _create_model_using(self, data, parent=None):

        logger.debug("model_data %s" % (data, ))
        try:
            model_ids, model_data = self._data_extractor.extract_model_data_from(
                data)
            model = self._model.get_or_create(**model_ids).update(**model_data)
            logger.debug(model)
            if parent and self._has_relationship != '':
                logger.debug('%s ----- %s ----> %s' %
                             (parent, self._has_relationship, model))
                getattr(parent, self._has_relationship)(model)
            for child in self.children:
                child.end_node.execute(data=data, parent=model)
        except Exception as e:
            logger.error(e)
Example #14
0
 def execute(self, data=None, parent=None):
     logger.debug('begin execute')
     url = self._url_extractor.get_next_url(data)
     response = self._informer.using(
         self._data_sanitizer).get_data_from(url)
     if response is None:
         logger.debug("No data")
         return
     if isinstance(response, list):
         logger.debug("Is list")
         for reponse_object in response:
             self._create_model_using(reponse_object, parent)
     else:
         self._create_model_using(response, parent=parent)
     if self.next:
         self.next.end_node.execute()