Beispiel #1
0
def splash_request(target_url, headers={}, request_timeout=10):
    ''' Ask splash to render a page. '''
    db_session = worker.get_session()
    splash_url = get_config(db_session, 'splash_url', required=True).value
    splash_user = get_config(db_session, 'splash_user', required=True).value
    splash_pass = get_config(db_session, 'splash_password',
                             required=True).value
    auth = (splash_user, splash_pass)
    splash_headers = {'content-type': 'application/json'}

    if 'user-agent' not in [header.lower() for header in headers.keys()]:
        headers['user-agent'] = USER_AGENT

    payload = {
        'url': target_url,
        'html': 1,
        'jpeg': 1,
        'har': 1,
        'history': 1,
        'timeout': request_timeout,
        'resource_timeout': 5,
        'headers': headers
    }

    splash_response = requests.post(urljoin(splash_url, 'render.json'),
                                    headers=splash_headers,
                                    json=payload,
                                    auth=auth)

    return splash_response
Beispiel #2
0
def splash_request(target_url,
                   headers={},
                   request_timeout=None,
                   wait=1,
                   use_proxy=False):
    ''' Ask splash to render a page. '''
    db_session = worker.get_session()
    splash_url = get_config(db_session, 'splash_url', required=True).value
    splash_user = get_config(db_session, 'splash_user', required=True).value
    splash_pass = get_config(db_session, 'splash_password',
                             required=True).value
    splash_user_agent = get_config(db_session,
                                   'splash_user_agent',
                                   required=True).value
    proxy = None

    if request_timeout is None:
        try:
            request_timeout = int(
                get_config(db_session, 'splash_request_timeout',
                           required=True).value)
        except:
            raise ScrapeException('Request timeout must be an integer: {}',
                                  request_timeout)

    auth = (splash_user, splash_pass)
    splash_headers = {'content-type': 'application/json'}

    if 'user-agent' not in [header.lower() for header in headers.keys()]:
        headers['user-agent'] = splash_user_agent

    payload = {
        'url': target_url,
        'html': 1,
        'jpeg': 1,
        'har': 1,
        'history': 1,
        'wait': wait,
        'render_all': 1,
        'width': 1024,
        'height': 768,
        'timeout': request_timeout,
        'resource_timeout': 5,
        'headers': headers
    }

    # Use proxy if enabled
    if use_proxy:
        proxy = random_proxy(db_session)

    if proxy:
        payload['proxy'] = proxy

    splash_response = requests.post(urljoin(splash_url, 'render.json'),
                                    headers=splash_headers,
                                    json=payload,
                                    auth=auth)

    return splash_response
Beispiel #3
0
    def index(self):
        '''
        Get QCR intents data.

        **Example Response**

        .. sourcecode:: json

            {
                "Google Maps":{
                    "intents":{
                        "geoloc":"@{{geoloc.lat}},{{geoloc.long}},12z",
                        "geobounds":"@{{geobounds.lat0}},{{geobounds.long0}},12z"
                    },
                    "hide":true,
                    "name":"Google Maps",
                    "url":"https://www.google.com/maps",
                    "desc":"Interactive maps",
                    "thumbnail":"googlemaps.png",
                    "icon":"googlemaps.png"
                },
                ...
            }

        :status 200: ok
        :status 401: authentication required
        :status 404: intents not found
        '''

        url = get_config(g.db, 'intents_url', required=True)
        username = get_config(g.db, 'intents_username', required=True)
        print(username)
        password = get_config(g.db, 'intents_password', required=True)

        if url is None or url.value.strip() == '':
            raise NotFound('Intents url is not configured.')

        if username is None or password is None:
            raise NotFound('Intents credentials not configured.')

        if username.value.strip() == ''  or password.value.strip == '':
            raise NotFound('Intents credentials not configured.')

        try:
            response = requests.get(
                url.value,
                auth=HTTPBasicAuth(username.value, password.value),
                verify=False,
                timeout=5
            )
        except:
            raise NotFound('Intents data could not be retrieved from server.')

        return jsonify(response.json())
Beispiel #4
0
    def index(self):
        '''
        Get QCR intents data.

        **Example Response**

        .. sourcecode:: json

            {
                "Google Maps":{
                    "intents":{
                        "geoloc":"@{{geoloc.lat}},{{geoloc.long}},12z",
                        "geobounds":"@{{geobounds.lat0}},{{geobounds.long0}},12z"
                    },
                    "hide":true,
                    "name":"Google Maps",
                    "url":"https://www.google.com/maps",
                    "desc":"Interactive maps",
                    "thumbnail":"googlemaps.png",
                    "icon":"googlemaps.png"
                },
                ...
            }

        :status 200: ok
        :status 401: authentication required
        :status 404: intents not found
        '''

        url = get_config(g.db, 'intents_url', required=True)
        username = get_config(g.db, 'intents_username', required=True)
        print(username)
        password = get_config(g.db, 'intents_password', required=True)

        if url is None or url.value.strip() == '':
            raise NotFound('Intents url is not configured.')

        if username is None or password is None:
            raise NotFound('Intents credentials not configured.')

        if username.value.strip() == '' or password.value.strip == '':
            raise NotFound('Intents credentials not configured.')

        try:
            response = requests.get(url.value,
                                    auth=HTTPBasicAuth(username.value,
                                                       password.value),
                                    verify=False,
                                    timeout=5)
        except:
            raise NotFound('Intents data could not be retrieved from server.')

        return jsonify(response.json())
Beispiel #5
0
def handle_invitation_response(rsvp_uuid, response):
    """Process an incoming reply from the form generated from a mailed invitation UUID."""
    global server_conf
    server_conf = configuration.get_config('server')
    page_pieces.set_server_conf()
    person_responding = person.Person.find(database.find_rsvp(rsvp_uuid))
    # set up viewing as though the user has actually logged in
    access_permissions.Access_Permissions.setup_access_permissions(person_responding.link_id)
    # todo: also tell django that they are effectively logged in?
    event_responding = event.Event.find_by_id(person_responding.invitations[rsvp_uuid])
    if response == 'accept':
        event_responding.add_invitation_accepted([person_responding])
        makers_server.generate_page('accepted', person_responding.name(), event_responding.title)
    elif response == 'decline':
        event_responding.add_invitation_declined([person_responding])
        makers_server.generate_page('declined', person_responding.name(), event_responding.title)
    elif response == 'drop':
        event_responding.add_invitation_declined([person_responding])
        person_responding.remove_training_request(event_responding.training_for_role(),
                                                  event_responding.equipment_type)
        makers_server.generate_page('dropped', person_responding.name(), event_responding.title)
    elif response == '':
        makers_server.generate_page('rsvp_choices', person_responding.name(), event_responding.title)
    else:
        makers_server.generate_page('rsvp_error', person_responding.name(), event_responding.title)
Beispiel #6
0
 def update_controls(self, params):
     config = model.configuration.get_config()
     default_visibilities = config['privacy_defaults']
     print("update_controls", params)
     self.visibility['host'] = to_bool_or_other(
         params.get('visibility_as_host',
                    default_visibilities['visibility_as_host']))
     self.visibility['attendee'] = to_bool_or_other(
         params.get('visibility_as_attendee',
                    default_visibilities['visibility_as_attendee']))
     self.visibility['general'] = to_bool_or_other(
         params.get('visibility_in_general',
                    default_visibilities['visibility_in_general']))
     stylesheet = os.path.basename(params.get('stylesheet', "makers"))
     if stylesheet in model.configuration.get_stylesheets():
         # use basename so the user can't pick unvetted styles (in case
         # of malicious stuff in them, in case you can do that in css)
         self.stylesheet = stylesheet
     else:
         # use the default if the specified one doesn't exist
         self.stylesheet = configuration.get_config('page', 'stylesheet')
     self.show_help = to_bool_or_other(params.get('display_help', False))
     self.notify_by_email = to_bool_or_other(
         params.get('notify_by_email', False))
     self.notify_in_site = to_bool_or_other(
         params.get('notify_in_site', False))
     self.save()
Beispiel #7
0
def main():
    """Program to remove person entries completely.
    Originally meant for removing accidental duplicates."""
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-d",
        "--deletions",
        help="""File containing the link_ids to delete, one per line""")
    parser.add_argument("-f",
                        "--for-real",
                        action='store_true',
                        help="""Without this flag, only do a dummy run.""")
    args = parser.parse_args()
    for_real = args.for_real
    config = configuration.get_config()
    db_config = config['database']
    collection_names = db_config['collections']
    database.database_init(config)
    with open(args.deletions) as deletions_file:
        for del_link_id in deletions_file.readlines():
            if for_real:
                print("Deleting", del_link_id)
                print("Result:", database.delete_by_link_id(del_link_id))
            else:
                print("Would delete", del_link_id)
Beispiel #8
0
def user_list_section(django_request,
                      include_non_members=False,
                      filter_fn=None,
                      filter_opaque=None):
    """Return the users list, if the viewing person is allowed to see it.
    Otherwise, just how many people there are.
    The optional first argument is a flag for whether to include non-members.
    The optional second argument is a boolean function taking a person object,
    returning whether to include them in the list.  This could be used for things
    like listing people whose fobs are ready for enabling, or who have missed
    paying their latest subscription.  A third argument is passed through
    to that function."""
    global serverconf
    if serverconf == None:
        serverconf = configuration.get_config()['server']
    viewing_user = model.person.Person.find(django_request.user.link_id)
    people = person.Person.list_all_people(
    ) if include_non_members else person.Person.list_all_members()
    if filter_fn:
        people = [
            someone for someone in people if filter_fn(someone, filter_opaque)
        ]
    people_dict = {whoever.name(): whoever for whoever in people}
    if viewing_user.is_auditor() or viewing_user.is_admin():
        return T.table[[
            T.tr[T.th(class_='mem_num')["Mem #"],
                 T.th(class_='username')["Name"],
                 T.th(class_='loginh')["Login"],
                 T.th(class_='flagsh')["Flags"],
                 T.th(class_='email')["Email"],
                 T.th(class_='user')["User"],
                 T.th(class_='owner')["Owner"],
                 T.th(class_='trainer')["Trainer"],
                 T.th(class_='note')["Notes"]]
        ], [
            T.tr[T.td(class_='mem_num')[str(who.membership_number)],
                 T.th(class_='username')[T.a(
                     href=django.urls.
                     reverse('dashboard:user_dashboard', args=(
                         [who.link_id])))[whoname]],
                 T.td(class_='login')[who.get_login_name() or ""],
                 T.td(class_='flags')[flagstring(who)],
                 T.td(class_='email')[T.a(
                     href="mailto:" + who.get_email() or "")[who.get_email()
                                                             or ""]],
                 T.td(class_='user'
                      )[equipment_type_role_name_list(who, 'user')],
                 T.td(class_='owner'
                      )[equipment_type_role_name_list(who, 'owner')],
                 T.td(class_='trainer'
                      )[equipment_type_role_name_list(who, 'trainer')],
                 T.td(class_='note')[T.form()[who.get_admin_note() or ""]]]
            for (whoname, who) in [(key, people_dict[key])
                                   for key in sorted(people_dict.keys())]
        ]]
    else:
        return T.p["There are " + str(len(people)) +
                   (" people" if include_non_members else " members") +
                   " in the database."]
Beispiel #9
0
def _get_proxies(db):
    """ Get a dictionary of proxy information from the app configuration. """

    piscina_url = get_config(db, 'piscina_proxy_url', required=True)

    if piscina_url is None or piscina_url.value.strip() == '':
        raise ScrapeException('No Piscina server configured.')

    return {
        'http': piscina_url.value,
        'https': piscina_url.value,
    }
Beispiel #10
0
def _get_proxies(db):
    ''' Get a dictionary of proxy information from the app configuration. '''

    piscina_url = get_config(db, 'piscina_proxy_url', required=True)

    if piscina_url is None or piscina_url.value.strip() == '':
        raise ScrapeException('No Piscina server configured.')

    return {
        'http': piscina_url.value,
        'https': piscina_url.value,
    }
def equipment_type_list_section(training_category):
    global serverconf
    global org_conf
    if serverconf == None:
        serverconf = configuration.get_config('server')
    if orgconf == None:
        orgconf = configuration.get_config('organization')
    eqtys = equipment_type.Equipment_type.list_equipment_types(training_category)
    print("training_category is", training_category, "and its types are", eqtys)
    return [T.h2[(T.a(href=orgconf['categories']+training_category.upper())[training_category.capitalize()]
                  or "All") + " equipment types"],
            [T.dl[[[T.dt[T.a(href=serverconf['types']+eqty.name)[eqty.pretty_name()]],
                    T.dd[T.dl[T.dt["Machines"],
                              [T.ul(class_="compactlist")[[T.li[T.a(href=serverconf['machines']+m.name)[m.name]]
                                                                  for m in eqty.get_machines()]]],
                              T.dt["Training requests"],
                              T.dd[
                                  # todo: no training requests are visible (check whether they are even created)
                                  T.ul(class_="compactlist")[[T.li[r.name()] for r in eqty.get_training_requests('user')]]
                              ]]]]
                    for eqty in eqtys]]]]
Beispiel #12
0
def help_for_topic(
        help_name,
        default_text="<p>Help text not available for topic %(topic)s</p>",
        substitutions={}):
    help_file = os.path.join(configuration.get_config('page', 'help_texts'),
                             help_name + ".html")
    if os.path.isfile(help_file):
        with open(help_file) as helpstream:
            return helpstream.read() % substitutions
    # this has to be a dictionary substitution because otherwise
    # default_text must contain a substitution marker:
    return default_text % {'topic': help_name}
Beispiel #13
0
def export0(args):
    verbose = args.verbose
    config = configuration.get_config()
    db_config = config['database']
    collection_names = db_config['collections']
    if verbose:
        print("collection names are", collection_names)
    database.database_init(config)
    if args.all:
        model.backup_to_csv.make_database_backup(tarballfilename=args.all)
    else:
        if args.users:
            export_role('user', args.users)
        if args.owners:
            export_role('owner', args.owners)
        if args.trainers:
            export_role('trainer', args.trainers)
Beispiel #14
0
def _splash_request(db_session, username, site, request_timeout):
    ''' Ask splash to render a page for us. '''
    target_url = site.get_url(username)
    splash_url = get_config(db_session, 'splash_url', required=True).value
    splash_headers = {
        'User-Agent': USER_AGENT,
    }
    splash_params = {
        'url': target_url,
        'html': 1,
        'jpeg': 1,
        'history': 1,
        'timeout': request_timeout,
        'resource_timeout': 5,
    }
    splash_response = requests.get(
        urljoin(splash_url, 'render.json'),
        headers=splash_headers,
        params=splash_params
    )
    result = {
        'code': splash_response.status_code,
        'error': None,
        'image': None,
        'site': site.as_dict(),
        'url': target_url,
    }

    splash_data = splash_response.json()

    try:
        splash_response.raise_for_status()

        if _check_splash_response(site, splash_response, splash_data):
            result['status'] = 'f'
        else:
            result['status'] = 'n'

        result['image'] = splash_data['jpeg']
    except Exception as e:
        result['status'] = 'e'
        result['error'] = str(e)

    return result
Beispiel #15
0
def invitation_response_form_page(rsvp_uuid):
    """From an invitation UUID that was mailed to someone, produce a response form."""
    global server_conf
    server_conf = configuration.get_config('server')
    page_pieces.set_server_conf()
    person_responding = person.Person.find(database.find_rsvp(rsvp_uuid))
    # set up viewing as though the user has actually logged in
    access_permissions.Access_Permissions.setup_access_permissions(person_responding.link_id)
    # todo: also tell django that they are effectively logged in?
    event_responding = event.Event.find_by_id(person_responding.invitations[rsvp_uuid])
    form_act = django.urls.reverse("events:rsvp_form", args=[rsvp_uuid])
    return T.div(class_="invitresp")[
        T.h1["RSVP for " + person_responding.name(access_permissions_event=event_responding)],
        T.p["This is a " + event_responding.event_type
            + " event starting at " + str(event_responding.start)
            + ".  The event will be hosted by "
            + ". and ".join([obj.name(access_permissions_role='host')
                             for obj in event_responding.hosts
                             if obj is not None])
            + "."],
        T.form(action=form_act,
               method='POST')[
            T.input(type="hidden",
                    name="rsvp_uuid",
                    value=rsvp_uuid),
            T.table[T.tr[T.td[T.input(type='radio',
                                      name='rsvp',
                                      value='accept')],
                         T.td["Accept invitation"]],
                    T.tr[T.td[T.input(type='radio',
                                      name='rsvp',
                                      value='decline')],
                         T.td["Decline invitation"]],
                    T.tr[T.td[T.input(type='radio',
                                      name='rsvp',
                                      value='drop')],
                         T.td["Decline invitation and cancel training request"]]],
            T.input(type="submit", value="Send response")]]
Beispiel #16
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-c", "--collection", default='profiles')
    parser.add_argument("-o", "--output", default=None)
    args = parser.parse_args()
    config = configuration.get_config()
    db_config = config['database']
    collection_names = db_config['collections']
    database.database_init(config)
    collection = args.collection
    output_name = args.output or (collection + ".csv")
    rows = database.get_collection_rows(collection)
    keys = []
    for row in rows:
        for key in row.keys():
            if key not in keys:
                keys.append(key)
    print("keys are", keys)
    with open(output_name, 'w') as csvfile:
        writer = csv.DictWriter(csvfile, keys)
        writer.writeheader()
        for row in rows:
            writer.writerow(row)
Beispiel #17
0
def get_slots_conf():
    """Get the timeslots information from the configuration file.
    It is cached once found, as it shouldn't change during a run.
    The results are a list of days of the week in order, a dictionary
    defining the named time periods in each day as a list of the
    start and end times, and a list of the named periods in order
    of their starting times."""
    global day_order, periods, period_order
    slotconf = configuration.get_config('timeslots')
    if day_order is None:
        day_order = slotconf['days']
    if periods is None:
        periods = { pname: [ pval for pval in
                             map(lambda pair: datetime.time(pair[0], pair[1]),
                               [ [ int(x) for x in slot.strip().split(':') ]
                                 for slot in pdescr.split('--') ]) ]
                    for pname, pdescr in slotconf['periods'].items() }
    if period_order is None:
        tmp = { startend[0]: name for name, startend in periods.items() }
        period_order = [ tmp[tm] for tm in sorted(tmp.keys()) ]
        if len(period_order) < 4:
            period_order.append('Other')
    return day_order, periods, period_order
Beispiel #18
0
    def post(self):
        '''
        Process user payment.

        **Example Request**

        .. sourcecode:: json

            {
                "user_id": 1,
                "stripe_token": "tok_1A9VDuL25MRJTn0APWrFQrN6",
                "credits": 400
                "currency": "usd",
                "description": "200 credits for $20",
            }

        **Example Response**

        .. sourcecode:: json

            {
                "message": "200 credits added."
            }

        :<header Content-Type: application/json
        :<header X-Auth: the client's auth token
        :<json int user_id: the user ID
        :<json str stripe_token: the stripe payment token
        :<json int credits: the purchase credits

        :>header Content-Type: application/json
        :>json string message: API response message

        :status 200: ok
        :status 400: invalid request body
        :status 401: authentication required
        :status 403: not authorized to make the requested changes
        '''
        # Validate json input
        request_json = request.get_json()
        validate_request_json(request_json, _payment_attrs)

        user = g.db.query(User).filter(
            User.id == request_json['user_id']).first()

        if g.user.id != user.id:
            raise Forbidden('You may only purchase credits for '
                            'your own account.')

        # Configure stripe client
        try:
            stripe.api_key = get_config(session=g.db,
                                        key='stripe_secret_key',
                                        required=True).value
        except Exception as e:
            raise ServiceUnavailable(e)

        key = 'credit_cost'
        credit_cost = g.db.query(Configuration) \
                          .filter(Configuration.key == key) \
                          .first()

        if credit_cost is None:
            raise NotFound(
                'There is no configuration item named "{}".'.format(key))

        # Stripe token is created client-side using Stripe.js
        token = request_json['stripe_token']

        # Get payment paremeters
        credits = int(request_json['credits'])
        description = request_json['description']
        currency = request_json['currency']
        costs = self._get_costs(credit_cost.value)

        # Calculate credit amount.
        try:
            amount = costs[credits]
            # credits = list(costs.keys())[list(
            #    costs.values()).index(int(amount))]
        except IndexError:
            raise BadRequest('Invalid credit amount.')

        try:
            # Charge the user's card:
            charge = stripe.Charge.create(amount=amount,
                                          currency=currency,
                                          description=description,
                                          source=token)
        except stripe.error.CardError as e:
            # Since it's a decline, stripe.error.CardError will be caught
            body = e.json_body
            err = body['error']
            raise BadRequest('Card error: {}'.format(err['message']))
        except stripe.error.RateLimitError as e:
            # Too many requests made to the API too quickly
            body = e.json_body
            err = body['error']
            raise BadRequest('Rate limit error: {}'.format(err['message']))
        except stripe.error.InvalidRequestError as e:
            # Invalid parameters were supplied to Stripe's API
            body = e.json_body
            err = body['error']
            raise BadRequest('Invalid parameters: {}'.format(err['message']))
        except stripe.error.AuthenticationError as e:
            # Authentication with Stripe's API failed
            # (maybe API keys changed recently)
            body = e.json_body
            err = body['error']
            raise ServiceUnavailable('Stripe authentication error: {}'.format(
                err['message']))
        except stripe.error.APIConnectionError as e:
            # Network communication with Stripe failed
            body = e.json_body
            err = body['error']
            raise ServiceUnavailable(
                'Stripe API communication failed: {}'.format(err['message']))
        except stripe.error.StripeError as e:
            # Generic error
            body = e.json_body
            err = body['error']
            raise ServiceUnavailable('Stripe error: {}'.format(err['message']))
        except Exception as e:
            # Something else happened, completely unrelated to Stripe
            raise ServiceUnavailable('Error: {}'.format(e))

        user.credits += credits
        g.db.commit()
        g.redis.publish('user', json.dumps(user.as_dict()))

        message = '{} credits added.'.format(amount)
        response = jsonify(message=message)
        response.status_code = 202

        return response
Beispiel #19
0
def scrape_twitter_posts(id_, recent):
    '''
    Fetch tweets for the user identified by id_.
    Checks tweets already stored in db, and will only fetch older or newer
    tweets depending on value of the boolean argument 'recent',
    e.g. recent=True will return recent tweets not already stored in the db.
    The number of tweets to fetch is configured in the Admin.
    '''
    db = worker.get_session()
    #max_results = _get_max_posts(db)['twitter']
    max_results = get_config(db, 'max_posts_twitter', required=True).value

    try:
        max_results = int(max_results)
    except:
        raise ScrapeException('Value of max_posts_twitter must be an integer')

    worker.start_job(total=max_results)
    redis = worker.get_redis()
    author = db.query(Profile).filter(Profile.id==id_).first()
    proxies = _get_proxies(db)
    results = 0
    max_id = None
    more_results = True
    count = 200

    if author is None:
        raise ValueError('No profile exists with id={}'.format(id_))

    # Get posts currently stored in db for this profile.
    post_query = db.query(Post) \
                        .filter(Post.author_id == id_) \
                        .order_by(Post.upstream_created.desc())

    url = 'https://api.twitter.com/1.1/statuses/user_timeline.json'
    params = {'count': count, 'user_id': author.upstream_id}

    if post_query.count() > 0:
        # Only fetch posts newer than those already stored in db
        if recent:
            since_id = post_query[0].upstream_id
            params['since_id'] = str(since_id)
        # Only fetch posts older than those already stored in db
        else:
            max_id = post_query[post_query.count() -1].upstream_id
            params['max_id'] = str(max_id)

    while more_results:
        response = requests.get(
            url,
            params=params,
            proxies=proxies,
            verify=False
        )
        response.raise_for_status()

        post_ids = list()

        tweets = response.json()
        if len(tweets) == 0:
            more_results = False

        if len(tweets) < count:
            more_results = False

        for tweet in tweets:
            # Twitter API result set includes the tweet with the max_id/since_id
            # so ignore it.
            if tweet['id_str'] != max_id:
                post = Post(
                    author,
                    tweet['id_str'],
                    dateutil.parser.parse(tweet['created_at']),
                    tweet['text']
                )

                if tweet['lang'] is not None:
                    post.language = tweet['lang']

                if tweet['coordinates'] is not None:
                    post.latitude, post.longitude = tweet['coordinates']

                place = tweet['place']

                if place is not None:
                    # Set longitude/latitude to the center the of bounding polygon.
                    total_lon = 0
                    total_lat = 0
                    num_coords = 0

                    for lon, lat in place['bounding_box']['coordinates'][0]:
                        total_lon += lon
                        total_lat += lat
                        num_coords += 1

                    post.longitude = total_lon / num_coords
                    post.latitude = total_lat / num_coords

                    # Set location to string identifying the place.
                    post.location = '{}, {}'.format(
                        place['full_name'],
                        place['country']
                    )

                db.add(post)
                db.flush()
                post_ids.append(post.id)
                # Set the max_id to the last tweet to get the next set of
                # results
                max_id = tweet['id_str']
                params['max_id'] = max_id
                results += 1
                worker.update_job(current=results)

                if results == max_results:
                    more_results = False
                    break


    db.commit()
    worker.finish_job()
    redis.publish('profile_posts', json.dumps({'id': id_}))
    app.queue.schedule_index_posts(post_ids)
Beispiel #20
0
def scrape_twitter_relations(id_):
    '''
    Fetch friends and followers for the Twitter user identified by `id_`.
    The number of friends and followers to fetch is configured in Admin.
    '''
    redis = worker.get_redis()
    db = worker.get_session()
    profile = db.query(Profile).filter(Profile.id==id_).first()
    proxies = _get_proxies(db)
    #max_results = _get_max_relations(db)['twitter']
    max_results = get_config(db, 'max_relations_twitter', required=True).value

    try:
        max_results = int(max_results)
    except:
        raise ScrapeException(
            'Value of max_relations_twitter must be an integer'
        )

    friends_results = 0
    friends_ids = []
    followers_results = 0
    followers_ids = []
    friends_cursor = -1
    followers_cursor = -1

    if profile is None:
        raise ValueError('No profile exists with id={}'.format(id_))

    params = {
        'count': 5000,
        'user_id': profile.upstream_id,
        'stringify_ids': True,
    }

    # Get friends currently stored in db for this profile.
    friends_query = \
        db.query(Profile.upstream_id) \
            .join(\
                profile_join_self, \
                (profile_join_self.c.friend_id == Profile.id)
            ) \
            .filter(profile_join_self.c.follower_id == id_)
    current_friends_ids = [friend.upstream_id for friend in friends_query]


    # Get followers currently stored in db for this profile.
    followers_query = \
        db.query(Profile.upstream_id) \
            .join(\
                profile_join_self, \
                (profile_join_self.c.follower_id == Profile.id)
            ) \
            .filter(profile_join_self.c.friend_id == id_)
    current_followers_ids = [follower.upstream_id for follower in followers_query]

    ## Get friend IDs.
    friends_url = 'https://api.twitter.com/1.1/friends/ids.json'
    params['cursor'] = friends_cursor

    while friends_results < max_results:
        friends_response = requests.get(
            friends_url,
            params=params,
            proxies=proxies,
            verify=False
        )
        friends_response.raise_for_status()

        # Ignore friends already in the db
        for friend_id in friends_response.json()['ids']:
            if friend_id not in current_friends_ids:
                friends_ids.append(friend_id)
                friends_results += 1
                if friends_results == max_results:
                    break

        friends_cursor = friends_response.json()['next_cursor']

        if friends_cursor == 0:
            break # No more results
        else:
            params['cursor'] = friends_cursor

    # Get follower IDs.
    followers_url = 'https://api.twitter.com/1.1/followers/ids.json'
    params['cursor'] = followers_cursor

    while followers_results < max_results:
        followers_response = requests.get(
            followers_url,
            params=params,
            proxies=proxies,
            verify=False
        )
        followers_response.raise_for_status()

        # Ignore followers already in the db
        for follower_id in followers_response.json()['ids']:
            if follower_id not in current_followers_ids:
                followers_ids.append(follower_id)
                followers_results += 1
                if followers_results == max_results:
                    break

        followers_cursor = followers_response.json()['next_cursor']

        if followers_cursor == 0:
            break # No more results
        else:
            params['cursor'] = followers_cursor

    # Get username for each of the friend/follower IDs and create
    # a relationship in QuickPin.
    user_ids = [(uid, 'friend') for uid in friends_ids] + \
               [(uid, 'follower') for uid in followers_ids]
    worker.start_job(total=len(user_ids))
    chunk_size = 100
    for chunk_start in range(0, len(user_ids), chunk_size):
        chunk_end = chunk_start + chunk_size
        chunk = user_ids[chunk_start:chunk_end]
        chunk_lookup = {id_:relation for id_,relation in chunk}

        lookup_url = 'https://api.twitter.com/1.1/users/lookup.json'
        lookup_response = requests.post(
            lookup_url,
            proxies=_get_proxies(db),
            verify=False,
            data={'user_id': ','.join(chunk_lookup.keys())}
        )
        lookup_response.raise_for_status()
        relations = lookup_response.json()

        for related_dict in relations:
            uid = related_dict['id_str']
            username = related_dict['screen_name']
            related_profile = Profile('twitter', uid, username, is_stub=True)
            db.add(related_profile)

            try:
                db.commit()
            except IntegrityError:
                # Already exists: use the existing profile.
                db.rollback()
                related_profile = db \
                    .query(Profile) \
                    .filter(Profile.site=='twitter') \
                    .filter(Profile.upstream_id==uid) \
                    .one()

            _twitter_populate_profile(related_dict, related_profile)
            relation = chunk_lookup[uid]

            if relation == 'friend':
                profile.friends.append(related_profile)
            else: # relation == 'follower':
                profile.followers.append(related_profile)

            db.commit()

        worker.update_job(current=chunk_end)

    db.commit()
    worker.finish_job()
    redis.publish('profile_relations', json.dumps({'id': id_}))
Beispiel #21
0
def scrape_instagram_posts(id_, recent):
    """
    Fetch instagram posts for the user identified by id_.
    Checks posts already stored in db, and will only fetch older or newer
    posts depending on value of the boolean argument 'recent',
    e.g. recent=True will return recent posts not already stored in the db.
    The number of posts to fetch is configured in the Admin.
    """
    redis = worker.get_redis()
    db = worker.get_session()
    author = db.query(Profile).filter(Profile.id == id_).first()
    proxies = _get_proxies(db)
    max_results = get_config(db, 'max_posts_instagram', required=True).value
    try:
        max_results = int(max_results)
    except:
        raise ScrapeException('Value of max_posts_instagram must be an integer')

    min_id = None
    results = 0
    params = {}

    if author is None:
        raise ValueError('No profile exists with id={}'.format(id_))

    url = 'https://api.instagram.com/v1/users/{}/media/recent' \
          .format(author.upstream_id)

    # Get last post currently stored in db for this profile.
    post_query = db.query(Post) \
        .filter(Post.author_id == id_) \
        .order_by(Post.upstream_created.desc()) \

    if post_query.count() > 0:
        # Only fetch posts newer than those already stored in db
        if recent:
            min_id = post_query[0].upstream_id
            params['min_id'] = str(min_id)
        # Only fetch posts older than those already stored in db
        else:
            max_id = post_query[post_query.count() - 1].upstream_id
            params['max_id'] = str(max_id)

    worker.start_job(total=max_results)
    while results < max_results:
        response = requests.get(
            url,
            params=params,
            proxies=proxies,
            verify=False
        )

        response.raise_for_status()
        post_ids = list()
        response_json = response.json()['data']
        pagination = response.json()['pagination']

        # Instagram API result includes post with min_id so remove it
        response_json[:] = [d for d in response_json if d.get('id') != min_id]

        for gram in response_json:
            if gram['caption'] is not None:
                text = gram['caption']['text']
            else:
                text = None

            post = Post(
                author,
                gram['id'],
                datetime.fromtimestamp(int(gram['created_time'])),
                text
            )

            if gram['location'] is not None:
                if 'latitude' in gram['location']:
                    post.latitude = gram['location']['latitude']
                    post.longitude = gram['location']['longitude']

                if 'name' in gram['location']:
                    post.location = gram['location']['name']

                    if 'street_address' in gram['location']:
                        post.location += ' ' + gram['location']['street_address']

            if 'images' in gram:
                image_url = gram['images']['standard_resolution']['url']
                name = os.path.basename(urlparse(image_url).path)
                img_response = requests.get(image_url, verify=False)
                mime = img_response.headers['Content-type']
                image = img_response.content
                post.attachments.append(File(name, mime, image))

            db.add(post)
            db.flush()
            post_ids.append(post.id)
            worker.update_job(current=results)
            results += 1
            if results == max_results:
                break

        # If there are more results, set the max_id param, otherwise finish
        if 'next_max_id' in pagination:
            params['max_id'] = pagination['next_max_id']
        else:
            break

    db.commit()
    worker.finish_job()
    redis.publish('profile_posts', json.dumps({'id': id_}))
    app.queue.schedule_index_posts(post_ids)
Beispiel #22
0
def scrape_twitter_relations(id_):
    """
    Fetch friends and followers for the Twitter user identified by `id_`.
    The number of friends and followers to fetch is configured in Admin.
    """
    redis = worker.get_redis()
    db = worker.get_session()
    profile = db.query(Profile).filter(Profile.id==id_).first()
    proxies = _get_proxies(db)
    max_results = get_config(db, 'max_relations_twitter', required=True).value

    try:
        max_results = int(max_results)
    except:
        raise ScrapeException(
            'Value of max_relations_twitter must be an integer'
        )

    friends_results = 0
    friends_ids = []
    followers_results = 0
    followers_ids = []
    friends_cursor = -1
    followers_cursor = -1

    if profile is None:
        raise ValueError('No profile exists with id={}'.format(id_))

    params = {
        'count': 5000,
        'user_id': profile.upstream_id,
        'stringify_ids': True,
    }

    # Get friends currently stored in db for this profile.
    friends_query = \
        db.query(Profile.upstream_id) \
            .join(\
                profile_join_self, \
                (profile_join_self.c.friend_id == Profile.id)
            ) \
            .filter(profile_join_self.c.follower_id == id_)
    current_friends_ids = [friend.upstream_id for friend in friends_query]


    # Get followers currently stored in db for this profile.
    followers_query = \
        db.query(Profile.upstream_id) \
            .join(\
                profile_join_self, \
                (profile_join_self.c.follower_id == Profile.id)
            ) \
            .filter(profile_join_self.c.friend_id == id_)
    current_followers_ids = [follower.upstream_id for follower in followers_query]

    ## Get friend IDs.
    friends_url = 'https://api.twitter.com/1.1/friends/ids.json'
    params['cursor'] = friends_cursor

    while friends_results < max_results:
        friends_response = requests.get(
            friends_url,
            params=params,
            proxies=proxies,
            verify=False,
            headers=TWITTER_HEADERS
        )
        friends_response.raise_for_status()

        # Ignore friends already in the db
        for friend_id in friends_response.json()['ids']:
            if friend_id not in current_friends_ids:
                friends_ids.append(friend_id)
                friends_results += 1
                if friends_results == max_results:
                    break

        friends_cursor = friends_response.json()['next_cursor']

        if friends_cursor == 0:
            break # No more results
        else:
            params['cursor'] = friends_cursor

    # Get follower IDs.
    followers_url = 'https://api.twitter.com/1.1/followers/ids.json'
    params['cursor'] = followers_cursor

    while followers_results < max_results:
        followers_response = requests.get(
            followers_url,
            params=params,
            proxies=proxies,
            verify=False,
            headers=TWITTER_HEADERS,
        )
        followers_response.raise_for_status()

        # Ignore followers already in the db
        for follower_id in followers_response.json()['ids']:
            if follower_id not in current_followers_ids:
                followers_ids.append(follower_id)
                followers_results += 1
                if followers_results == max_results:
                    break

        followers_cursor = followers_response.json()['next_cursor']

        if followers_cursor == 0:
            break # No more results
        else:
            params['cursor'] = followers_cursor

    # Get username for each of the friend/follower IDs and create
    # a relationship in QuickPin.
    user_ids = [(uid, 'friend') for uid in friends_ids] + \
               [(uid, 'follower') for uid in followers_ids]
    worker.start_job(total=len(user_ids))
    chunk_size = 100
    for chunk_start in range(0, len(user_ids), chunk_size):
        chunk_end = chunk_start + chunk_size
        chunk = user_ids[chunk_start:chunk_end]
        chunk_lookup = {id_:relation for id_,relation in chunk}

        lookup_url = 'https://api.twitter.com/1.1/users/lookup.json'
        lookup_response = requests.post(
            lookup_url,
            proxies=_get_proxies(db),
            verify=False,
            headers=TWITTER_HEADERS,
            data={'user_id': ','.join(chunk_lookup.keys())}
        )
        lookup_response.raise_for_status()
        relations = lookup_response.json()

        for related_dict in relations:
            uid = related_dict['id_str']
            username = related_dict['screen_name']
            related_profile = Profile('twitter', uid, username, is_stub=True)
            db.add(related_profile)

            try:
                db.commit()
            except IntegrityError:
                # Already exists: use the existing profile.
                db.rollback()
                related_profile = db \
                    .query(Profile) \
                    .filter(Profile.site=='twitter') \
                    .filter(Profile.upstream_id==uid) \
                    .one()

            _twitter_populate_profile(related_dict, related_profile)
            relation = chunk_lookup[uid]

            if relation == 'friend':
                profile.friends.append(related_profile)
            else: # relation == 'follower':
                profile.followers.append(related_profile)

            db.commit()

        worker.update_job(current=chunk_end)

    db.commit()
    worker.finish_job()
    redis.publish('profile_relations', json.dumps({'id': id_}))
Beispiel #23
0
def page_string(page_title,
                content,
                user=None,
                initial_tab=None,
                needs_jquery=False):
    """Make up a complete page as a string."""
    conf = configuration.get_config()
    page_conf = conf['page']
    org_conf = conf['organization']
    preamble = page_conf.get('preamble', '')
    script_file = page_conf['script_file']
    script_body = ""
    if os.path.exists(script_file):
        with open(script_file) as mfile:
            script_body = mfile.read()
    script_text = """<script type="text/javascript">""" + script_body + """</script>\n"""
    if needs_jquery:
        script_text += """<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>\n"""
    motd = ""
    motd_file = page_conf['motd_file']
    if os.path.exists(motd_file):
        with open(motd_file) as mfile:
            motd = mfile.read()
    stylesheet_name = page_conf['stylesheet']
    if user and user.stylesheet:
        user_stylesheet_name = os.path.join(os.path.dirname(stylesheet_name),
                                            user.stylesheet + ".css")
        if os.path.exists(user_stylesheet_name):
            stylesheet_name = user_stylesheet_name
    if os.path.exists(stylesheet_name):
        inline = page_conf['style_inline']
        if inline:
            with open(stylesheet_name) as sf:
                style_text = '<style type="text/css">' + sf.read() + '</style>'
        else:
            style_text = '<link rel="stylesheet" type="text/css" href="' + stylesheet_name + '">'
    # todo: put the motd into the preamble
    postamble = page_conf.get('postamble', '')
    final_setup = """<script type="text/javascript">selectTab('""" + initial_tab + """')</script>""" if initial_tab else ""
    page_heading = page_title
    logo = page_conf.get('heading_logo', None)
    if logo:
        logo_height = int(page_conf.get('logo_height', "32"))
        page_heading = T.span[
            page_heading,
            T.a(href=org_conf['home_page'])[T.img(align="right",
                                                  alt=org_conf['title'],
                                                  height=logo_height,
                                                  src=logo)]]
    footer = T.footer[
        T.hr,
        T.p(class_="the_small_print")
        ["Produced by the ",
         T.a(href="https://github.com/hillwithsmallfields/makers/")["makers"],
         " system.  ", "We use ",
         T.a(href="https://www.djangoproject.com/")["django"],
         " to handle login and sessions, and that uses a ",
         T.
         a(href=
           "https://docs.djangoproject.com/en/2.1/topics/http/sessions/#using-cookie-based-sessions"
           )["session cookie"], " and a ",
         T.a(href="https://docs.djangoproject.com/en/2.1/ref/csrf/"
             )["CSRF protection cookie"], ".  ",
         "We don't use any other cookies that we are aware of, and we neither sell your data nor give it away."]]
    return RawHtmlPage(
        page_title,
        untemplate.HTML5Doc([
            untemplate.safe_unicode(style_text + script_text + preamble),
            T.body[T.h1[page_heading], content, footer],
            untemplate.safe_unicode(postamble),
            untemplate.safe_unicode(final_setup)
        ],
                            head=T.head[T.title[page_title]])).to_string()
Beispiel #24
0
def scrape_instagram_relations(id_):
    """
    Fetch friends and followers for the Instagram user identified by `id_`.
    The number of friends and followers to fetch is configured in Admin.
    """
    redis = worker.get_redis()
    db = worker.get_session()
    profile = db.query(Profile).filter(Profile.id==id_).first()
    proxies = _get_proxies(db)
    friends_results = 0
    followers_results = 0
    max_results = get_config(db, 'max_relations_instagram', required=True).value

    try:
        max_results = int(max_results)
    except:
        raise ScrapeException(
            'Value of max_relations_instagram must be an integer'
        )

    friends_params = {}
    followers_params = {}
    total_results = max_results*2

    if profile is None:
        raise ValueError('No profile exists with id={}'.format(id_))

    # Get friends currently stored in db for this profile.
    friends_query = \
        db.query(Profile.upstream_id) \
            .join(\
                profile_join_self, \
                (profile_join_self.c.friend_id == Profile.id)
            ) \
            .filter(profile_join_self.c.follower_id == id_)
    current_friends_ids = [friend.upstream_id for friend in friends_query]

    # Get followers currently stored in db for this profile.
    followers_query = \
        db.query(Profile.upstream_id) \
            .join(\
                profile_join_self, \
                (profile_join_self.c.follower_id == Profile.id)
            ) \
            .filter(profile_join_self.c.friend_id == id_)
    current_followers_ids = [follower.upstream_id for follower in followers_query]

    worker.start_job(total=total_results)

    # Get friend IDs.
    friends_url = 'https://api.instagram.com/v1/users/{}/follows' \
                  .format(profile.upstream_id)

    while friends_results < max_results:
        # Get friends from Instagram API
        friends_response = requests.get(
            friends_url,
            params=friends_params,
            proxies=proxies,
            verify=False
        )
        friends_response.raise_for_status()
        pagination = friends_response.json()['pagination']

        for friend in friends_response.json()['data']:
            # Only store friends that are not already in db.
            if friend['id'] not in current_friends_ids:
                related_profile = Profile(
                    'instagram',
                    friend['id'],
                    friend['username'],
                    is_stub=True
                )

                db.add(related_profile)

                try:
                    db.commit()
                except IntegrityError:
                    db.rollback()
                    related_profile = db \
                            .query(Profile) \
                            .filter(Profile.site=='instagram') \
                            .filter(Profile.upstream_id==friend['id']) \
                            .one()

                related_profile.name = friend['full_name']
                profile.friends.append(related_profile)
                friends_results += 1
                worker.update_job(current=friends_results)

                if friends_results == max_results:
                    break

        # If there are more results, set the cursor paramater, otherwise finish
        if 'next_cursor' in pagination:
            friends_params['cursor'] = pagination['next_cursor']
        else:
            break # No more results

    # Get follower IDs.
    followers_url = 'https://api.instagram.com/v1/users/{}/followed-by' \
                    .format(profile.upstream_id)

    # Get followers from Instagram API
    while followers_results < max_results:
        # Get friends from Instagram API
        followers_response = requests.get(
            followers_url,
            params=followers_params,
            proxies=proxies,
            verify=False
        )
        followers_response.raise_for_status()
        pagination = followers_response.json()['pagination']

        for follower in followers_response.json()['data']:
            # Only store followers that are not already in db.
            if follower['id'] not in current_followers_ids:
                related_profile = Profile(
                    'instagram',
                    follower['id'],
                    follower['username'],
                    is_stub=True
                )

                db.add(related_profile)

                try:
                    db.commit()
                except IntegrityError:
                    db.rollback()
                    related_profile = db \
                            .query(Profile) \
                            .filter(Profile.site=='instagram') \
                            .filter(Profile.upstream_id==follower['id']) \
                            .one()

                related_profile.name = follower['full_name']
                profile.followers.append(related_profile)
                followers_results += 1
                worker.update_job(current=friends_results + followers_results)

                if followers_results == max_results:
                    break

        # If there are more results, set the cursor paramater, otherwise finish
        if 'next_cursor' in pagination:
            followers_params['cursor'] = pagination['next_cursor']
        else:
            break # No more results

    worker.finish_job()
    redis.publish('profile_relations', json.dumps({'id': id_}))
def set_access_permissions_as_admin(access_permissions):
    access_permissions.add_role(
        'owner',
        configuration.get_config()['organization']['database'])
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-y",
                        "--equipment-types",
                        default="equipment-types.csv")
    parser.add_argument("-e", "--equipment", default="equipment.csv")
    parser.add_argument("-m", "--members", default="members.csv")
    parser.add_argument("-u", "--users", default="users.csv")
    parser.add_argument("-o", "--owners", default="owners.csv")
    parser.add_argument("-t", "--trainers", default="trainers.csv")
    parser.add_argument("-b", "--templates", default="event_templates")
    parser.add_argument("--delete-existing", action='store_true')
    parser.add_argument("-v", "--verbose", action='store_true')
    parser.add_argument("-q", "--quick", action='store_true')
    parser.add_argument("-x", "--existing", "--no-import", action='store_true')
    args = parser.parse_args()

    start_time = time.time()

    config = configuration.get_config()

    access_permissions.Access_Permissions.change_access_permissions(
        set_access_permissions_as_admin)

    days, slots, order = timeslots.get_slots_conf()
    print("periods are", slots, "in order", order)

    if not args.existing:
        print("importing from spreadsheet files")
        importer.import0(args)
    else:
        database.database_init(config, args.delete_existing)

    stage_time = time.time()

    print("import complete, running random user behaviour at",
          int(stage_time - start_time), "seconds")
    all_types = equipment_type.Equipment_type.list_equipment_types()
    green_equipment = equipment_type.Equipment_type.list_equipment_types(
        'green')
    green_templates = [
        make_training_event_template(eqty) for eqty in green_equipment
    ]
    print("green templates are", green_templates)

    if not args.existing:
        random_user_activities(all_types, green_templates, args.verbose)

    this_time = time.time()
    print("Completed main random behaviour in", int(this_time - stage_time),
          "seconds")
    stage_time = this_time

    # make sure there are some events going on right now
    # todo: find why it's failing to create these events, then fix it, then see whether the "current event" code is working
    everybody = person.Person.list_all_people()
    n_current = random.randrange(3, 7)
    print("Creating", n_current, "current events")
    for _ in range(1, n_current):
        event_datetime = datetime.now()
        event_datetime = event_datetime.replace(hour=event_datetime.hour -
                                                random.randrange(0, 2),
                                                minute=0,
                                                second=0,
                                                microsecond=0)
        print("Making current event starting at", event_datetime)
        setup_random_event(green_templates,
                           event_datetime,
                           [random.choice(green_equipment)._id],
                           [random.choice(everybody)._id],
                           verbose=True)
        print("There are now", len(timeline.Timeline.present_events().events),
              "present events")

    # make sure there are some future events
    # todo: find why it's failing to create these events, then fix it, then see whether the "future event" code is working
    n_future = random.randrange(24, 48)
    print("Creating", n_future, "future events")
    for _ in range(1, n_future):
        event_datetime = datetime.now()
        event_datetime = event_datetime.replace(
            hour=19, minute=0, second=0, microsecond=0) + timedelta(
                random.randrange(1, 21))
        print("Making future event starting at", event_datetime)
        setup_random_event(green_templates,
                           event_datetime,
                           [random.choice(green_equipment)._id],
                           [random.choice(everybody)._id],
                           verbose=True)
        print("There are now", len(timeline.Timeline.future_events().events),
              "future events and", len(timeline.Timeline.past_events().events),
              "past events")

    print("present events are", timeline.Timeline.present_events().events)
    print("future events are", timeline.Timeline.future_events().events)

    if not args.quick:
        print("listing members")
        all_members = person.Person.list_all_members()
        for whoever in all_members:
            show_person("member-pages", whoever)

    show_equipment_types()
Beispiel #27
0
def scrape_instagram_relations(id_):
    '''
    Fetch friends and followers for the Instagram user identified by `id_`.
    The number of friends and followers to fetch is configured in Admin.
    '''
    redis = worker.get_redis()
    db = worker.get_session()
    profile = db.query(Profile).filter(Profile.id==id_).first()
    proxies = _get_proxies(db)
    friends_results = 0
    followers_results = 0
    #max_results = _get_max_relations(db)['instagram']
    max_results = get_config(db, 'max_relations_instagram', required=True).value

    try:
        max_results = int(max_results)
    except:
        raise ScrapeException(
            'Value of max_relations_instagram must be an integer'
        )

    friends_params = {}
    followers_params = {}
    total_results = max_results*2

    if profile is None:
        raise ValueError('No profile exists with id={}'.format(id_))

    # Get friends currently stored in db for this profile.
    friends_query = \
        db.query(Profile.upstream_id) \
            .join(\
                profile_join_self, \
                (profile_join_self.c.friend_id == Profile.id)
            ) \
            .filter(profile_join_self.c.follower_id == id_)
    current_friends_ids = [friend.upstream_id for friend in friends_query]

    # Get followers currently stored in db for this profile.
    followers_query = \
        db.query(Profile.upstream_id) \
            .join(\
                profile_join_self, \
                (profile_join_self.c.follower_id == Profile.id)
            ) \
            .filter(profile_join_self.c.friend_id == id_)
    current_followers_ids = [follower.upstream_id for follower in followers_query]

    worker.start_job(total=total_results)

    # Get friend IDs.
    friends_url = 'https://api.instagram.com/v1/users/{}/follows' \
                  .format(profile.upstream_id)

    while friends_results < max_results:
        # Get friends from Instagram API
        friends_response = requests.get(
            friends_url,
            params=friends_params,
            proxies=proxies,
            verify=False
        )
        friends_response.raise_for_status()
        pagination = friends_response.json()['pagination']

        for friend in friends_response.json()['data']:
            # Only store friends that are not already in db.
            if friend['id'] not in current_friends_ids:
                related_profile = Profile(
                    'instagram',
                    friend['id'],
                    friend['username'],
                    is_stub=True
                )

                db.add(related_profile)

                try:
                    db.commit()
                except IntegrityError:
                    db.rollback()
                    related_profile = db \
                            .query(Profile) \
                            .filter(Profile.site=='instagram') \
                            .filter(Profile.upstream_id==friend['id']) \
                            .one()

                related_profile.name = friend['full_name']
                profile.friends.append(related_profile)
                friends_results += 1
                worker.update_job(current=friends_results)

                if friends_results == max_results:
                    break

        # If there are more results, set the cursor paramater, otherwise finish
        if 'next_cursor' in pagination:
            friends_params['cursor'] = pagination['next_cursor']
        else:
            break # No more results

    # Get follower IDs.
    followers_url = 'https://api.instagram.com/v1/users/{}/followed-by' \
                    .format(profile.upstream_id)

    # Get followers from Instagram API
    while followers_results < max_results:
        # Get friends from Instagram API
        followers_response = requests.get(
            followers_url,
            params=followers_params,
            proxies=proxies,
            verify=False
        )
        followers_response.raise_for_status()
        pagination = followers_response.json()['pagination']

        for follower in followers_response.json()['data']:
            # Only store followers that are not already in db.
            if follower['id'] not in current_followers_ids:
                related_profile = Profile(
                    'instagram',
                    follower['id'],
                    follower['username'],
                    is_stub=True
                )

                db.add(related_profile)

                try:
                    db.commit()
                except IntegrityError:
                    db.rollback()
                    related_profile = db \
                            .query(Profile) \
                            .filter(Profile.site=='instagram') \
                            .filter(Profile.upstream_id==follower['id']) \
                            .one()

                related_profile.name = follower['full_name']
                profile.followers.append(related_profile)
                followers_results += 1
                worker.update_job(current=friends_results + followers_results)

                if followers_results == max_results:
                    break

        # If there are more results, set the cursor paramater, otherwise finish
        if 'next_cursor' in pagination:
            followers_params['cursor'] = pagination['next_cursor']
        else:
            break # No more results

    worker.finish_job()
    redis.publish('profile_relations', json.dumps({'id': id_}))
Beispiel #28
0
def scrape_twitter_posts(id_, recent):
    """
    Fetch tweets for the user identified by id_.
    Checks tweets already stored in db, and will only fetch older or newer
    tweets depending on value of the boolean argument 'recent',
    e.g. recent=True will return recent tweets not already stored in the db.
    The number of tweets to fetch is configured in the Admin.
    """
    db = worker.get_session()
    max_results = get_config(db, 'max_posts_twitter', required=True).value

    try:
        max_results = int(max_results)
    except:
        raise ScrapeException('Value of max_posts_twitter must be an integer')

    worker.start_job(total=max_results)
    redis = worker.get_redis()
    author = db.query(Profile).filter(Profile.id==id_).first()
    proxies = _get_proxies(db)
    results = 0
    max_id = None
    more_results = True
    count = 200

    if author is None:
        raise ValueError('No profile exists with id={}'.format(id_))

    # Get posts currently stored in db for this profile.
    post_query = db.query(Post) \
                        .filter(Post.author_id == id_) \
                        .order_by(Post.upstream_created.desc())

    url = 'https://api.twitter.com/1.1/statuses/user_timeline.json'
    params = {'count': count, 'user_id': author.upstream_id}

    if post_query.count() > 0:
        # Only fetch posts newer than those already stored in db
        if recent:
            since_id = post_query[0].upstream_id
            params['since_id'] = str(since_id)
        # Only fetch posts older than those already stored in db
        else:
            max_id = post_query[post_query.count() -1].upstream_id
            params['max_id'] = str(max_id)

    while more_results:
        response = requests.get(
            url,
            params=params,
            proxies=proxies,
            verify=False,
            headers=TWITTER_HEADERS,
        )
        response.raise_for_status()

        post_ids = list()

        tweets = response.json()
        if len(tweets) == 0:
            more_results = False

        if len(tweets) < count:
            more_results = False

        for tweet in tweets:
            # Twitter API result set includes the tweet with the max_id/since_id
            # so ignore it.
            if tweet['id_str'] != max_id:
                post = Post(
                    author,
                    tweet['id_str'],
                    dateutil.parser.parse(tweet['created_at']),
                    tweet['text']
                )

                if tweet['lang'] is not None:
                    post.language = tweet['lang']

                if tweet['coordinates'] is not None:
                    post.latitude, post.longitude = tweet['coordinates']

                place = tweet['place']

                if place is not None:
                    # Set longitude/latitude to the center the of bounding polygon.
                    total_lon = 0
                    total_lat = 0
                    num_coords = 0

                    for lon, lat in place['bounding_box']['coordinates'][0]:
                        total_lon += lon
                        total_lat += lat
                        num_coords += 1

                    post.longitude = total_lon / num_coords
                    post.latitude = total_lat / num_coords

                    # Set location to string identifying the place.
                    post.location = '{}, {}'.format(
                        place['full_name'],
                        place['country']
                    )

                db.add(post)
                db.flush()
                post_ids.append(post.id)
                # Set the max_id to the last tweet to get the next set of
                # results
                max_id = tweet['id_str']
                params['max_id'] = max_id
                results += 1
                worker.update_job(current=results)

                if results == max_results:
                    more_results = False
                    break


    db.commit()
    worker.finish_job()
    redis.publish('profile_posts', json.dumps({'id': id_}))
    app.queue.schedule_index_posts(post_ids)
import model.database
import model.equipment_type
import model.event
import model.machine
import model.pages
import model.person
import model.timeline
import model.timeslots

import pages.equipment_type_list_page
import pages.equipment_type_page
import pages.person_page

import utils.importer

genconf = configuration.get_config()
interest_areas = genconf['skill_areas']

evening_timeslots = timeslots.timeslots_to_int([[False, False, True]] * 7)
weekend_timeslots = timeslots.timeslots_to_int([[False, False, False]] * 5 +
                                               [[True, True, True]] * 2)
evening_and_weekend_timeslots = evening_timeslots | weekend_timeslots

print("evening_timeslots:", timeslots.timeslots_from_int(evening_timeslots))
print("weekend_timeslots:", timeslots.timeslots_from_int(weekend_timeslots))
print("evening_and_weekend_timeslots:",
      timeslots.timeslots_from_int(evening_and_weekend_timeslots))


def set_access_permissions_as_admin(access_permissions):
    access_permissions.add_role(
Beispiel #30
0
def scrape_instagram_posts(id_, recent):
    '''
    Fetch instagram posts for the user identified by id_.
    Checks posts already stored in db, and will only fetch older or newer
    posts depending on value of the boolean argument 'recent',
    e.g. recent=True will return recent posts not already stored in the db.
    The number of posts to fetch is configured in the Admin.
    '''
    redis = worker.get_redis()
    db = worker.get_session()
    author = db.query(Profile).filter(Profile.id==id_).first()
    proxies = _get_proxies(db)
    max_results = get_config(db, 'max_posts_instagram', required=True).value
    try:
        max_results = int(max_results)
    except:
        raise ScrapeException('Value of max_posts_instagram must be an integer')

    min_id = None
    more_results = True
    results = 0
    params = {}

    if author is None:
        raise ValueError('No profile exists with id={}'.format(id_))

    url = 'https://api.instagram.com/v1/users/{}/media/recent' \
          .format(author.upstream_id)

    # Get last post currently stored in db for this profile.
    post_query = db.query(Post) \
                        .filter(Post.author_id == id_) \
                        .order_by(Post.upstream_created.desc()) \

    if post_query.count() > 0:
        # Only fetch posts newer than those already stored in db
        if recent:
            min_id = post_query[0].upstream_id
            params['min_id'] = str(min_id)
        # Only fetch posts older than those already stored in db
        else:
            max_id = post_query[post_query.count() -1].upstream_id
            params['max_id'] = str(max_id)

    worker.start_job(total=max_results)
    logging.warning('WORKER max results: {}'.format(max_results))
    while results < max_results:
        response = requests.get(
            url,
            params=params,
            proxies=proxies,
            verify=False
        )

        response.raise_for_status()
        post_ids = list()
        response_json = response.json()['data']
        pagination = response.json()['pagination']

        # Instagram API result includes post with min_id so remove it
        response_json[:] = [d for d in response_json if d.get('id') != min_id]

        for gram in response_json:
            if gram['caption'] is not None:
                text = gram['caption']['text']
            else:
                text = None

            post = Post(
                author,
                gram['id'],
                datetime.fromtimestamp(int(gram['created_time'])),
                text
            )

            if gram['location'] is not None:
                if 'latitude' in gram['location']:
                    post.latitude = gram['location']['latitude']
                    post.longitude = gram['location']['longitude']

                if 'name' in gram['location']:
                    post.location = gram['location']['name']

                    if 'street_address' in gram['location']:
                        post.location += ' ' + gram['location']['street_address']

            if 'images' in gram:
                image_url = gram['images']['standard_resolution']['url']
                name = os.path.basename(urlparse(image_url).path)
                img_response = requests.get(image_url, verify=False)
                mime = img_response.headers['Content-type']
                image = img_response.content
                post.attachments.append(File(name, mime, image))

            db.add(post)
            db.flush()
            post_ids.append(post.id)
            worker.update_job(current=results)
            results += 1
            if results == max_results:
                break

        # If there are more results, set the max_id param, otherwise finish
        if 'next_max_id' in pagination:
            params['max_id'] = pagination['next_max_id']
        else:
            break

    db.commit()
    worker.finish_job()
    redis.publish('profile_posts', json.dumps({'id': id_}))
    app.queue.schedule_index_posts(post_ids)