def detect_by_url(self, row):
     url = row['url']
     hashed_url = hash_url(url)
     found = False
     datasets = Dataset.objects.filter(resources__urlhash=hashed_url)
     for dataset in datasets:
         resource = get_by(dataset.resources, 'urlhash', hashed_url)
         self.resources.append({
             'dataset': dataset,
             'resource': resource,
             'data': row,
         })
         found = True
     resources = CommunityResource.objects.filter(urlhash=hashed_url)
     for resource in resources:
         self.community_resources.append({
             'resource': resource,
             'data': row,
         })
         found = True
     if not found:
         log.error('No resource found by url',
                   extra={
                       'hashed_url': hashed_url,
                       'url': url
                   })
Esempio n. 2
0
 def detect_download_objects(self):
     for row in self.rows:
         if 'url' not in row:
             continue
         last_url_match = re.match(LATEST_URL_REGEX, row['url'])
         resource_id = last_url_match and last_url_match.group(1)
         if resource_id:
             self.detect_by_resource_id(resource_id, row)
         else:
             hashed_url = hash_url(row['url'])
             self.detect_by_hashed_url(hashed_url, row)
Esempio n. 3
0
    def handle_downloads(self, row, day):
        if 'url' in row:
            try:
                hashed_url = hash_url(row['url'])
                data = (
                    Dataset.objects(resources__urlhash=hashed_url).first()
                    or
                    CommunityResource.objects(urlhash=hashed_url).first()
                )
                if isinstance(data, Dataset):
                    dataset = data
                    resource = get_by(dataset.resources, 'urlhash', hashed_url)
                    log.debug('Found resource download: %s', resource.url)
                    self.count(resource, day, row)
                    metric = ResourceViews(resource)
                    metric.compute()
                    # Use the MongoDB positionnal operator ($)
                    cmd = 'set__resources__S__metrics__{0}'.format(metric.name)
                    qs = Dataset.objects(id=dataset.id,
                                         resources__id=resource.id)
                    qs.update(**{cmd: metric.value})
                    if dataset.organization:
                        OrgResourcesDownloads(dataset.organization).compute()
                elif isinstance(data, CommunityResource):
                    resource = data
                    log.debug('Found community resource download: %s',
                              resource.url)
                    self.count(resource, day, row)
                    metric = CommunityResourceViews(resource)
                    metric.compute()
                    resource.metrics[metric.name] = metric.value
                    resource.save()

            except:
                log.exception('Unable to count download for %s', row['url'])
        if 'subtable' in row:
            for subrow in row['subtable']:
                self.handle_downloads(subrow, day)
Esempio n. 4
0
 def url_exists(cls, url):
     urlhash = hash_url(url)
     return cls.objects(urlhash=urlhash).count() > 0
Esempio n. 5
0
 def clean(self):
     super(Reuse, self).clean()
     '''Auto populate urlhash from url'''
     if not self.urlhash or 'url' in self._get_changed_fields():
         self.urlhash = hash_url(self.url)
Esempio n. 6
0
 def clean(self):
     super(ResourceMixin, self).clean()
     if not self.urlhash or 'url' in self._get_changed_fields():
         self.urlhash = hash_url(self.url)
Esempio n. 7
0
 def url_exists(cls, url):
     urlhash = hash_url(url)
     return cls.objects(urlhash=urlhash).count() > 0
Esempio n. 8
0
 def clean(self):
     '''Auto populate urlhash from url'''
     if not self.urlhash or 'url' in self._get_changed_fields():
         self.urlhash = hash_url(self.url)
     super(Reuse, self).clean()