コード例 #1
0
ファイル: handlers.py プロジェクト: lukerosiak/datacommons
    def read(self, request):
        count = request.GET.get('count', None)
        start = request.GET.get('start', None)
        end = request.GET.get('end', None)
        entity_type = request.GET.get('type', None)
        
        if entity_type:
            where_clause = "where type = %s"
        else:
            where_clause = ''

        if count:
            return dict(count=execute_top(self.count_stmt % where_clause, *([entity_type] if entity_type else []))[0][0])
        
        if start is not None and end is not None:
            try:
                start = int(start)
                end = int(end)
            except:
                error_response = rc.BAD_REQUEST
                error_response.write("Must provide integers for start and end.")
                return error_response
        else:
            error_response = rc.BAD_REQUEST
            error_response.write("Must specify valid start and end parameters.")
            return error_response

        if (end < start or end - start > 10000):
            error_response = rc.BAD_REQUEST
            error_response.write("Only 10,000 entities can be retrieved at a time.")
            return error_response
        
        raw_result = execute_top(self.stmt % where_clause, *([entity_type] if entity_type else []) + [start, end - start + 1])
        return [dict(zip(self.fields, row)) for row in raw_result]
コード例 #2
0
ファイル: handlers.py プロジェクト: dmc2015/datacommons
    def read(self, request, **kwargs):
        cycle = request.GET.get('cycle')

        raw_candidates_result = execute_top(self.candidates_stmt, cycle, kwargs['entity_id'], cycle)
        candidates = [dict(zip(self.candidates_fields, row)) for row in raw_candidates_result]

        # this makes a separate query for each candidate
        # if this proves to be slow it could be done in one query with slightly more complex code
        for candidate in candidates:
            raw_timeline = execute_top(self.timeline_stmt, candidate['candidate_id'], cycle)
            candidate['timeline'] = [amount for (week, amount) in raw_timeline]

        return candidates
コード例 #3
0
    def read(self, request, **kwargs):
        cycle = request.GET.get('cycle')

        raw_candidates_result = execute_top(self.candidates_stmt, cycle,
                                            kwargs['entity_id'], cycle)
        candidates = [
            dict(zip(self.candidates_fields, row))
            for row in raw_candidates_result
        ]

        # this makes a separate query for each candidate
        # if this proves to be slow it could be done in one query with slightly more complex code
        for candidate in candidates:
            raw_timeline = execute_top(self.timeline_stmt,
                                       candidate['candidate_id'], cycle)
            candidate['timeline'] = [amount for (week, amount) in raw_timeline]

        return candidates
コード例 #4
0
ファイル: handlers.py プロジェクト: aburan28/datacommons
    def read(self, request):
        query = request.GET['query']
        (lat, lon) = (float(request.GET.get('lat')), float(request.GET.get('lon')))
        limit = int(request.GET.get('limit', '10'))
        
        raw_result = execute_top(self.stmt, query_to_ft_sql(query), "POINT(%s %s)" % (lon, lat), limit)
        
        labeled_result = [dict(zip(self.fields, row)) for row in raw_result]

        return labeled_result    
コード例 #5
0
ファイル: handlers.py プロジェクト: lukerosiak/datacommons
    def read(self, request):
        query = request.GET.get('search', None)
        if not query:
            error_response = rc.BAD_REQUEST
            error_response.write("Must include a query in the 'search' parameter.")
            return error_response

        parsed_query = ' & '.join(re.split(r'[ &|!():*]+', unquote_plus(query)))
        raw_result = execute_top(self.stmt, parsed_query)

        return [dict(zip(self.fields, row)) for row in raw_result]
コード例 #6
0
ファイル: handlers.py プロジェクト: aburan28/datacommons
    def read(self, request):
        query = request.GET.get('search')
        entity_type = request.GET.get('type')
        if not query:
            error_response = rc.BAD_REQUEST
            error_response.write("Must include a query in the 'search' parameter.")
            return error_response

        stmt = self.stmt

        if entity_type:
            stmt += '\n        and e.type = %s'

        parsed_query = ' & '.join(re.split(r'[ &|!():*]+', unquote_plus(query)))
        query_params = (parsed_query)
        query_params = [x for x in (parsed_query, entity_type) if x]
        raw_result = execute_top(stmt, *query_params)

        return [dict(zip(self.fields, row)) for row in raw_result]
コード例 #7
0
ファイル: handlers.py プロジェクト: lukerosiak/datacommons
def get_totals(entity_id):
    totals = dict()
    for row in execute_top(get_totals_stmt, *[entity_id] * 11):
        totals[row[0]] = dict(zip(EntityHandler.totals_fields, row[1:]))
    return totals
コード例 #8
0
ファイル: handlers.py プロジェクト: dmc2015/datacommons
    def read(self, request):
        query = request.GET.get('search')
        if not query:
            error_response = rc.BAD_REQUEST
            error_response.write("Must include a query in the 'search' parameter.")
            return error_response

        stmt = self.stmt

        per_page = min(int(request.GET.get('per_page', 10)), 25)
        page = max(int(request.GET.get('page', 1)), 1)
        start = (page - 1) * per_page
        end = start + per_page

        parsed_query = ' & '.join(re.split(r'[ &|!():*]+', unquote_plus(query)))
        where_filters = []
        extra_joins = []
        filters = {}

        subtype_raw = request.GET.get('subtype', None)
        subtype = subtype_raw if subtype_raw in set(('contributors', 'lobbyists', 'politicians', 'industries', 'lobbying_firms', 'political_groups', 'other_orgs')) else None
        if subtype:
            if subtype == 'contributors':
                where_filters.append("e.type = 'individual' and mbim.is_contributor = 't'")
                extra_joins.append("left join matchbox_individualmetadata mbim on e.id = mbim.entity_id")
            elif subtype == 'lobbyists':
                where_filters.append("e.type = 'individual' and mbim.is_lobbyist = 't'")
                extra_joins.append("left join matchbox_individualmetadata mbim on e.id = mbim.entity_id")
            elif subtype == 'politicians':
                where_filters.append("e.type = 'politician'")
                
                # check for seat and state
                seat = request.GET.get('seat', None)
                seat = seat if seat in valid_seats else None

                state = request.GET.get('state', None)
                state = state if state in valid_states else None

                party = request.GET.get('party', None)
                party = party if party in valid_parties else None

                if state or seat or party:
                    pol_filters = ["state = '%s'" % state if state else None, "seat = '%s'" % seat if seat else None]
                    if party:
                        if party in ('R', 'D'):
                            pol_filters.append("party = '%s'" % party)
                        else:
                            pol_filters.append("party not in ('R', 'D')")
                    pol_where = " and ".join(filter(lambda x:x, pol_filters))
                    extra_joins.append("inner join (select distinct on (entity_id) * from matchbox_politicianmetadata where %s order by entity_id, cycle desc) mbpm on e.id = mbpm.entity_id" % pol_where)
            elif subtype == 'industries':
                where_filters.append("e.type = 'industry'")
            elif subtype == 'lobbying_firms':
                where_filters.append("e.type = 'organization' and mbom.lobbying_firm = 't'")
                extra_joins.append("left join (select distinct on (entity_id) * from matchbox_organizationmetadata order by entity_id, cycle desc) mbom on e.id = mbom.entity_id")
            elif subtype == 'political_groups':
                where_filters.append("e.type = 'organization'")
                extra_joins.append("inner join (select distinct on (entity_id) * from matchbox_entityattribute where namespace = 'urn:fec:committee') mbea on e.id = mbea.entity_id")
            elif subtype == 'other_orgs':
                where_filters.append("e.type = 'organization' and mbom.lobbying_firm = 'f'")
                where_filters.append("(coalesce(mbea.value, '') = '' or mbom.is_corporation = 't' or mbom.is_cooperative = 't' or mbom.is_corp_w_o_capital_stock = 't')")
                extra_joins.append("left join (select distinct on (entity_id) * from matchbox_entityattribute where namespace = 'urn:fec:committee') mbea on e.id = mbea.entity_id")
                extra_joins.append("left join (select distinct on (entity_id) * from matchbox_organizationmetadata order by entity_id, cycle desc) mbom on e.id = mbom.entity_id")
        else:
            # subtype implies type, so only use explicit type if there's not a subtype
            etype_raw = request.GET.get('type', None)
            if etype_raw:
                allowed_types = set(('organization', 'industry', 'individual', 'politician'))
                entity_type = [etype for etype in etype_raw.split(',') if etype in allowed_types]
                if entity_type:
                    where_filters.append("e.type in (%s)" % ','.join(["'%s'" % etype for etype in entity_type]))
                    filters['type'] = entity_type

        where_clause = "where %s" % (" and ".join(where_filters)) if where_filters else ""
        join_clause = " ".join(extra_joins)

        query_params = (parsed_query,)
        raw_result = execute_top(stmt.replace("WHERE", where_clause).replace("JOINS", join_clause), *query_params)

        total = len(raw_result)
        results = [dict(zip(self.fields, row)) for row in raw_result[start:end]]

        print raw_result, results

        if total:
            ids = ','.join(["'%s'" % row['id'] for row in results])

            totals = dict()
            for row in execute_top(search_totals_stmt.replace("%s", ids)):
                totals[row[0]] = dict(zip(EntityHandler.totals_fields, row[1:]))

            # grab the metadata
            entities = {e.id: e for e in Entity.objects.select_related().filter(id__in=[row['id'] for row in results])}

            for row in results:
                row['totals'] = totals.get(row['id'], None)

                # match metadata, but strip out year keys
                meta = entities[row['id']].metadata if row['id'] in entities else None
                row['metadata'] = {k: v for k, v in meta.items() if not (type(k) == int or k.isdigit())} if meta else None
                row['external_ids'] = [{'namespace': attr.namespace, 'id': attr.value} for attr in entities[row['id']].attributes.all()] if row['id'] in entities else None

        return {
            'results': results,
            'page': page,
            'per_page': per_page,
            'total': total,
            'pages': math.ceil(float(total) / per_page),
            'filters': filters
        }