Beispiel #1
0
    def fetch(self, params, offset=None, limit=None):
        results = []
        if params.get('country'):
            params['country_json'] = '{"0":"' + params['country'] + '"}'
        params.update(self.params)  # type is default
        resp = requests.get(self.URL, params=params)
        soup = Soup(resp.text, 'html.parser')
        contents = soup.findAll('ul', {'class': 'searchResults'})
        if not contents:
            return results, 0

        content = contents[0]
        for item in content.findAll('li', {'class': ['searchResultItem']}):
            itemcontent = item.find(
                'div', {'class': ['searchResultItem_content', 'media_body']})
            urlcontent = item.find('div',
                                   {'class': 'searchResultItem_download'})
            datecontent = item.find('span', {'class': 'searchResultItem_date'})
            title = itemcontent.find('a').get_text()
            pdfurl = urlcontent.find('a')['href']
            raw_date = datecontent.find('b').get_text()  # 4 July 2018
            date = datetime.datetime.strptime(raw_date, '%d %B %Y')
            data = Lead(
                # FIXME: use proper key
                id=pdfurl,
                title=title.strip(),
                published_on=date.date(),
                url=pdfurl,
                source='UNHCR Portal',
                source_type='',
                website='data2.unhcr.org')
            results.append(data)

        return results, len(results)
Beispiel #2
0
 def get_queryset(self):
     # TODO: Optimize this queryset
     return Lead.get_for(self.request.user).select_related(
         'project',
         'project__analysis_framework',
         'project__analysis_framework__organization',
     )
Beispiel #3
0
    def fetch(self, params, offset=None, limit=None):
        results = []
        resp = requests.get(self.URL, params=params)
        soup = Soup(resp.text, 'html.parser')
        contents = soup.findAll('div', {'class': 'wrapper-type'})
        if not contents:
            return results, 0

        content = contents[0]
        for item in content.findAll('div', {'class': 'views-row'}):
            bottomcontent = item.find('div', {'class': 'content-bottom'})
            topcontent = item.find('div', {'class': 'content-top'})
            date = topcontent.find('span', {'class': 'updated-date'}).text
            date = datetime.datetime.strptime(date, '%d/%m/%Y')
            title = topcontent.find('div', {'class': 'field-item'}).text
            link = bottomcontent.find('div', {'class': 'field-item'}).find('a')
            data = Lead(
                # FIXME: use proper key
                id=link['href'],
                title=title.strip(),
                published_on=date.date(),
                url=link['href'],
                source='Briefing Notes',
                source_type=Lead.WEBSITE,
                website='www.acaps.org/special-reports')
            results.append(data)

        return results, len(results)
Beispiel #4
0
 def fetch(self, params, page=None, limit=None):
     country = params.get('country')
     if not country:
         return [], 0
     results = []
     resp = requests.get(self.URL)
     soup = Soup(resp.text, 'html.parser')
     content = soup.find('tbody')
     for row in content.findAll('tr'):
         elem = row.find('a')
         name = elem.get_text()
         name = re.sub('(.*)', '', name)
         title = row.findAll('td')[-1].get_text()
         if name.strip() == country.strip():
             # add as lead
             url = elem['href']
             if url[0] == '/':  # means relative path
                 url = self.website + url
             data = Lead(title=title.strip(),
                         url=url,
                         source='PDNA portal',
                         source_type=Lead.WEBSITE,
                         website=self.website)
             results.append(data)
     return results, len(results)
Beispiel #5
0
    def fetch(self, params, page=None, limit=None):
        results = []
        if not params or not params.get('feed-url'):
            return results

        feed = feedparser.parse(params['feed-url'])

        title_field = params.get('title-field')
        date_field = params.get('date-field')
        source_field = params.get('source-field')
        url_field = params.get('url-field')
        website = params.get('website')

        for entry in feed.entries:
            title = title_field and entry.get(title_field)
            date = date_field and entry.get(date_field)
            source = source_field and entry.get(source_field)
            url = url_field and entry.get(url_field)

            data = Lead(
                title=title,
                published_on=date,
                source=source,
                url=url,
                website=website,
                source_type=Lead.RSS,
            )

            results.append(data)

        return results
Beispiel #6
0
    def get_leads(self, params) -> Tuple[List[Lead], int]:
        def _parse_date(date_raw) -> Union[None, datetime.date]:
            if type(date_raw) == datetime.date:
                return date_raw
            elif type(date_raw) == datetime.datetime:
                return date_raw.date()
            else:
                published_on = str_to_date(date_raw)
                if published_on:
                    return published_on.date()

        leads_data, total_count = self.fetch(copy.deepcopy(params))
        if not leads_data:
            return [], total_count

        organization_search = OrganizationSearch([
            label for d in leads_data for label in [d['source'], d['author']]
        ])

        leads = []
        for ldata in leads_data:
            published_on = _parse_date(ldata['published_on'])
            lead = Lead(
                id=ldata.get('id', random_key()),
                title=ldata['title'],
                published_on=published_on,
                url=ldata['url'],
                source_raw=ldata['source'],
                author_raw=ldata['author'],
                source=organization_search.get(ldata['source']),
                author=organization_search.get(ldata['author']),
                source_type=ldata['source_type'],
            )

            if ldata.get('author') is not None:
                lead._authors = list(
                    filter(None, [organization_search.get(ldata['author'])]))

            # Add emm info
            if ldata.get('emm_triggers') is not None:
                lead._emm_triggers = ldata['emm_triggers']
            if ldata.get('emm_entities') is not None:
                lead._emm_entities = ldata['emm_entities']

            leads.append(lead)

        return leads, total_count
Beispiel #7
0
    def get_queryset(self):
        leads = Lead.get_for(self.request.user)

        lead_id = self.request.GET.get('similar')
        if lead_id:
            similar_lead = Lead.objects.get(id=lead_id)
            leads = leads.filter(project=similar_lead.project).annotate(
                similarity=TrigramSimilarity('title', similar_lead.title)
            ).filter(similarity__gt=0.3).order_by('-similarity')
        return leads
Beispiel #8
0
    def fetch(self, params, offset=None, limit=None):
        results = []

        # Example: http://apidoc.rwlabs.org/#filter

        post_params = {}
        post_params['fields'] = {
            'include': ['url_alias', 'title', 'date.original',
                        'source', 'source.homepage']
        }

        if params.get('country'):
            post_params['filter'] = {
                'field': 'country.iso3',
                'value': params['country'],
            }

        if params.get('search'):
            post_params['query'] = {
                'value': params['search'],
                'fields': ['title'],
                'operator': 'AND',
            }

        if offset:
            post_params['offset'] = offset
        if limit:
            post_params['limit'] = limit

        post_params['sort'] = ['date.original:desc', 'title:asc']

        resp = requests.post(self.URL, json=post_params).json()
        count = resp['totalCount']

        for datum in resp['data']:
            fields = datum['fields']
            lead = Lead(
                id=str(datum['id']),
                title=fields['title'],
                published_on=fields['date']['original'],
                url=fields['url_alias'],
                source=fields['source'][0]['name'],
                website='www.reliefweb.int',
            )
            results.append(lead)

        return results, count
Beispiel #9
0
    def has_permission(self, request, view):
        project_id = view.kwargs.get('project_id')
        lead_id = view.kwargs.get('lead_id')
        entry_id = view.kwargs.get('entry_id')
        analysis_pillar_id = view.kwargs.get('analysis_pillar_id')

        if project_id:
            return Project.get_for_member(
                request.user).filter(id=project_id).exists()
        elif lead_id:
            return Lead.get_for(request.user).filter(id=lead_id).exists()
        elif entry_id:
            return Entry.get_for(request.user).filter(id=entry_id).exists()
        elif analysis_pillar_id:
            return AnalysisPillar.objects.filter(
                analysis__project__projectmembership__member=request.user,
                id=analysis_pillar_id).exists()
        return True
 def fetch(self, params, page=None, limit=None):
     results = []
     resp = requests.get(self.URL, params=params)
     soup = Soup(resp.text, 'html.parser')
     contents = soup.find('table').find('tbody').findAll('tr')
     for row in contents:
         tds = row.findAll('td')
         title = tds[0].get_text().replace('_', ' ')
         date = tds[1].find('span').attrs['content'][:10]  # just date str  # noqa
         date = datetime.datetime.strptime(date, '%Y-%m-%d')
         url = tds[0].find('a').attrs['href']
         data = Lead(
             title=title.strip(),
             published_on=date.date(),
             url=url,
             source="Research Resource Center",
             source_type=Lead.WEBSITE,
             website=self.URL
         )
         results.append(data)
     return results, len(results)
Beispiel #11
0
    def fetch(self, params, page=None, limit=None):
        results = []
        if not params or not params.get('feed-url'):
            return results, 0

        r = requests.get(params['feed-url'])
        xml = etree.fromstring(r.content)
        items = xml.findall('channel/item')

        title_field = params.get('title-field')
        date_field = params.get('date-field')
        source_field = params.get('source-field')
        url_field = params.get('url-field')
        website_field = params.get('website-field')

        for item in items:
            def get_field(field):
                if not field:
                    return ''
                element = item.find(field)
                return element.text
            title = get_field(title_field)
            date = get_field(date_field)
            source = get_field(source_field)
            url = get_field(url_field)
            website = get_field(website_field)

            data = Lead(
                # FIXME: use proper key
                id=url,
                title=title,
                published_on=date,
                source=source,
                url=url,
                website=website,
                source_type=Lead.RSS,
            )
            results.append(data)

        return results, len(results)
Beispiel #12
0
    def get(self, request, uuid=None, filename=None):
        queryset = File.objects.prefetch_related('lead_set')
        file = get_object_or_404(queryset, uuid=uuid)
        user = request.user
        leads_pk = file.lead_set.values_list('pk', flat=True)

        if (file.is_public
                or Lead.get_for(user).filter(pk__in=leads_pk).exists()
                or Entry.get_for(user).filter(image=file).exists() or Entry.
                get_for(user).filter(image_raw=request.build_absolute_uri(
                    reverse('file', kwargs={'file_id': file.pk}), )).exists()
                # TODO: Add Profile
            ):
            if file.file:
                return redirect(request.build_absolute_uri(file.file.url))
            return response.Response({
                'error': 'File doesn\'t exists',
            },
                                     status=status.HTTP_404_NOT_FOUND)
        return response.Response({
            'error': 'Access Forbidden, Contact Admin',
        },
                                 status=status.HTTP_403_FORBIDDEN)
Beispiel #13
0
 def fetch(self, params, page=None, limit=None):
     results = []
     url = self.URL
     if params.get('country'):
         url = self.URL + '/loc/' + params['country']
     resp = requests.get(url, params={})
     soup = Soup(resp.text, 'html.parser')
     contents = soup.find('div', {'id': 'content'}).find('tbody')
     for row in contents.findAll('tr'):
         tds = row.findAll('td')
         title = tds[0].find('a').get_text().strip()
         datestr = tds[3].get_text().strip()
         date = datetime.strptime(datestr, '%m/%d/%Y')
         url = tds[4].find('a')['href']
         data = Lead(
             title=title,
             published_on=date.date(),
             url=url,
             source='Humanitarian Response',
             website=self.URL,
             source_type=Lead.WEBSITE
         )
         results.append(data)
     return results, len(results)
Beispiel #14
0
    def fetch(self, params, page=None, limit=None):
        results = []
        if page:
            params['page'] = page
        resp = requests.get(self.URL, params=params)

        soup = Soup(resp.text, 'html.parser')
        contents = soup.find('div', {'class': 'view-content'})
        if not contents:
            return results, len(results)
        # iterate and get leads
        for row in contents.findAll('div', {'class': 'views-row'}):
            content = row.find('h3').find('a')
            title = content.get_text()
            url = content['href']
            data = Lead(
                title=title.strip(),
                url=url,
                source='WFP Assessments',
                source_type=Lead.WEBSITE,
                website='www.wfp.org'
            )
            results.append(data)
        return results, len(results)
Beispiel #15
0
 def get_queryset(self):
     return Lead.get_for(self.request.user)