Beispiel #1
0
    def post(self, user, library):
        """
        HTTP POST request that adds a document to a library for a given user
        :param user: user ID as given by the API
        :param library: library ID

        :return: the response for if the library was successfully created
        """
        data = get_post_data(request)

        if data['action'] == 'add':
            current_app.logger.info('User requested to add a document')
            self.add_document_to_library(library_id=library,
                                         document_data=data)
            return {}, 200

        elif data['action'] == 'remove':
            current_app.logger.info('User requested to remove a document')
            self.remove_documents_from_library(library_id=library,
                                               document_data=data)
            return {}, 200

        else:
            current_app.logger.info('User requested a non-standard action')
            return {}, 400
Beispiel #2
0
    def post(self):
        """
        HTTP POST request that returns a list of concepts or keywords, based on
        the user given string

        Post data
        ---------
        abstract: document's abstract

        Return data (on success)
        ------------------------
        concepts: <list>

        HTTP Responses:
        --------------
        Succeed authentication: 200
        Any other responses will be default Flask errors
        """
        post_data = get_post_data(request, types={'abstract': unicode})
        current_app.logger.debug('Received data from user: {}'.format(post_data))

        corpus = current_app.config['CORPUS']
        x_predict = corpus.vectorizer.transform([post_data['abstract']])
        labels = [list(i) for i in corpus.predict(x_predict)]

        current_app.logger.debug('Found labels: {}'.format(labels))

        return {'concepts': labels}, 200
Beispiel #3
0
    def post(self):
        """
        HTTP POST request
        :return: status code from the slack end point
        """

        post_data = get_post_data(request)
        current_app.logger.info('Received feedback: {0}'.format(post_data))

        if not post_data.get('g-recaptcha-response', False) or \
                not verify_recaptcha(request):
            current_app.logger.info('The captcha was not verified!')
            return err(ERROR_UNVERIFIED_CAPTCHA)
        else:
            current_app.logger.info('Skipped captcha!')

        try:
            current_app.logger.info('Prettifiying post data: {0}'
                                    .format(post_data))
            formatted_post_data = json.dumps(self.prettify_post(post_data))
            current_app.logger.info('Data prettified: {0}'
                                    .format(formatted_post_data))
        except BadRequestKeyError as error:
            current_app.logger.error('Missing keywords: {0}, {1}'
                                     .format(error, post_data))
            return err(ERROR_MISSING_KEYWORDS)

        slack_response = requests.post(
            url=current_app.config['FEEDBACK_SLACK_END_POINT'],
            data=formatted_post_data
        )

        return slack_response.json(), slack_response.status_code
Beispiel #4
0
    def post(self, user, library):
        """
        HTTP POST request that adds a document to a library for a given user
        :param user: user ID as given by the API
        :param library: library ID

        :return: the response for if the library was successfully created
        """
        data = get_post_data(request)

        if data['action'] == 'add':
            current_app.logger.info('User requested to add a document')
            self.add_document_to_library(library_id=library,
                                         document_data=data)
            return {}, 200

        elif data['action'] == 'remove':
            current_app.logger.info('User requested to remove a document')
            self.remove_documents_from_library(
                library_id=library,
                document_data=data
            )
            return {}, 200

        else:
            current_app.logger.info('User requested a non-standard action')
            return {}, 400
Beispiel #5
0
 def dispatch_request(self, **kwargs):
     """
     post response format:
         {
             error_code: error_code(0 is success),
             data: data(may be null)
         }
     """
     try:
         data = get_post_data()
         rv = self.proc_data(data, **kwargs)
         return make_api_response(error_code=SUCCESS, data=rv)
     except ValueError, e:
         return make_api_response(error_code=e.message, data=None)
Beispiel #6
0
def verify_recaptcha(request, ep=None):
    """
    Verify a google recaptcha based on the data contained in the request

    :param request: flask.request
    :param ep: google recaptcha endpoint
    :type ep: basestring|None
    :return:True|False
    """
    if ep is None:
        ep = current_app.config['GOOGLE_RECAPTCHA_ENDPOINT']
    data = get_post_data(request)
    payload = {
        'secret': current_app.config['GOOGLE_RECAPTCHA_PRIVATE_KEY'],
        'remoteip': request.remote_addr,
        'response': data['g-recaptcha-response']
    }
    r = requests.post(ep, data=payload)
    r.raise_for_status()
    return True if (r.json()['success'] == True) else False
    def post(self):
        """
        HTTP POST Request
        Return the statistics for two queries.
        """

        post_data = get_post_data(request)

        try:
            author1 = ads.SearchQuery(q=post_data['query1'], fq='database:astronomy', fl=['id', 'bibcode'], rows=1000)
            author2 = ads.SearchQuery(q=post_data['query2'], fq='database:astronomy', fl=['id', 'bibcode'], rows=1000)
        except KeyError:
            abort(404)

        bibcodes1 = [paper.bibcode for paper in author1]
        bibcodes2 = [paper.bibcode for paper in author2]

        riq1 = 0
        riq2 = 0

        if bibcodes1:
            metrics1 = ads.MetricsQuery(bibcodes=bibcodes1)
            response1 = metrics1.execute()
            riq1 = response1['indicators']['riq']

        if bibcodes2:
            metrics2 = ads.MetricsQuery(bibcodes=bibcodes2)
            response2 = metrics2.execute()
            riq2 = response2['indicators']['riq']

        response = {
            'author1': {
                'riq': riq1
            },
            'author2': {
                'riq': riq2
            }
        }

        return response, 200
Beispiel #8
0
    def post(self, user):
        """
        HTTP POST request that creates a library for a given user
        :param user: user ID as given by the API

        :return: the response for if the library was successfully created
        """

        # Check if the user exists, if not, generate a user in the database
        current_app.logger.info('Checking if the user exists')
        if not self.user_exists(absolute_uid=user):
            current_app.logger.info(
                'User: {0:d}, does not exist.'.format(user))

            self.create_user(absolute_uid=user)
            current_app.logger.info('User: {0:d}, created.'.format(user))
        else:
            current_app.logger.info('User already exists.')

        # Switch to the service UID and not the API UID
        service_uid = self.absolute_uid_to_service_uid(absolute_uid=user)
        current_app.logger.info(
            'user_API: {0:d} is now user_service: {1:d}'.format(
                user, service_uid))

        # Create the library
        data = get_post_data(request)
        try:
            library = \
                self.create_library(service_uid=service_uid, library_data=data)
        except IntegrityError as error:
            return {'error': DUPLICATE_LIBRARY_NAME_ERROR['body']}, \
                DUPLICATE_LIBRARY_NAME_ERROR['number']

        return {
            'name': library.name,
            'id': library.id,
            'description': library.description
        }, 200
Beispiel #9
0
    def post(self, user):
        """
        HTTP POST request that creates a library for a given user
        :param user: user ID as given by the API

        :return: the response for if the library was successfully created
        """

        # Check if the user exists, if not, generate a user in the database
        current_app.logger.info('Checking if the user exists')
        if not self.user_exists(absolute_uid=user):
            current_app.logger.info('User: {0:d}, does not exist.'
                                    .format(user))

            self.create_user(absolute_uid=user)
            current_app.logger.info('User: {0:d}, created.'.format(user))
        else:
            current_app.logger.info('User already exists.')

        # Switch to the service UID and not the API UID
        service_uid = self.absolute_uid_to_service_uid(absolute_uid=user)
        current_app.logger.info('user_API: {0:d} is now user_service: {1:d}'
                                .format(user, service_uid))

        # Create the library
        data = get_post_data(request)
        try:
            library = \
                self.create_library(service_uid=service_uid, library_data=data)
        except IntegrityError as error:
            return {'error': DUPLICATE_LIBRARY_NAME_ERROR['body']}, \
                DUPLICATE_LIBRARY_NAME_ERROR['number']

        return {'name': library.name,
                'id': library.id,
                'description': library.description}, 200
Beispiel #10
0
    def post(self):
        """
        HTTP POST request that receives the user's ADS 2.0 credentials, and
        then contacts the Classic system to check that what the user provided
        is indeed valid. If valid, the users ID is stored.

        Post body:
        ----------
        KEYWORD, VALUE
        twopointoh_email: <string> ADS 2.0 e-mail of the user
        twopointoh_password: <string> ADS 2.0 password of the user

        Return data (on success):
        -------------------------
        twopointoh_authed: <boolean> were they authenticated
        twopointoh_email: <string> e-mail that authenticated correctly

        HTTP Responses:
        --------------
        Succeed authentication: 200
        Bad/malformed data: 400
        User unknown/wrong password/failed authentication: 404
        ADS Classic give unknown messages: 500
        ADS Classic times out: 504

        Any other responses will be default Flask errors
        """
        post_data = get_post_data(request)

        # Collect the username, password from the request
        try:
            twopointoh_email = post_data['twopointoh_email']
            twopointoh_password = post_data['twopointoh_password']
        except KeyError:
            current_app.logger.warning(
                'User did not provide a required key: {}'
                .format(traceback.print_exc())
            )
            return err(CLASSIC_DATA_MALFORMED)

        # Create the correct URL
        url = current_app.config['ADS_CLASSIC_URL'].format(
            mirror=current_app.config['ADS_TWO_POINT_OH_MIRROR'],
        )
        params = {
            'man_cmd': 'elogin',
            'man_email': twopointoh_email,
            'man_passwd': twopointoh_password
        }

        # Authenticate
        current_app.logger.info(
            'User "{email}" trying to authenticate"'
            .format(email=twopointoh_email)
        )
        try:
            response = requests.post(
                url,
                params=params
            )
        except requests.exceptions.Timeout:
            current_app.logger.warning(
                'ADS Classic end point timed out, returning to user'
            )
            return err(CLASSIC_TIMEOUT)

        if response.status_code >= 500:
            message, status_code = err(CLASSIC_UNKNOWN_ERROR)
            message['ads_classic'] = {
                'message': response.text,
                'status_code': response.status_code
            }
            current_app.logger.warning(
                'ADS Classic has responded with an unknown error: {}'
                .format(response.text)
            )
            return message, status_code

        # Sanity check the response
        email = response.json()['email']
        if email != twopointoh_email:
            current_app.logger.warning(
                'User email "{}" does not match ADS return email "{}"'
                .format(twopointoh_email, email)
            )
            return err(CLASSIC_AUTH_FAILED)

        # Respond to the user based on whether they were successful or not
        if response.status_code == 200 \
                and response.json()['message'] == 'LOGGED_IN' \
                and int(response.json()['loggedin']):
            current_app.logger.info(
                'Authenticated successfully "{email}"'
                .format(email=twopointoh_email)
            )

            absolute_uid = self.helper_get_user_id()
            try:
                user = Users.query.filter(
                    Users.absolute_uid == absolute_uid
                ).one()

                current_app.logger.info('User already exists in database')
                user.twopointoh_email = twopointoh_email
            except NoResultFound:
                current_app.logger.info('Creating entry in database for user')
                user = Users(
                    absolute_uid=absolute_uid,
                    twopointoh_email=twopointoh_email
                )

            db.session.add(user)
            db.session.commit()

            current_app.logger.info(
                'Successfully saved content for "{}" to database'
                .format(twopointoh_email)
            )

            return {
                'twopointoh_email': email,
                'twopointoh_authed': True
            }, 200
        else:
            current_app.logger.warning(
                'ADS 2.0 credentials for "{email}" did not succeed"'
                .format(email=twopointoh_email)
            )
            return err(CLASSIC_AUTH_FAILED)
Beispiel #11
0
    def post(self):
        """
        HTTP POST request that receives the user's ADS Classic credentials, and
        then contacts the Classic system to check that what the user provided is
        indeed valid. If valid, the users ID is stored within the myADS service
        store.

        Post body:
        ----------
        KEYWORD, VALUE
        classic_email: <string> ADS Classic e-mail of the user
        classic_password: <string> ADS Classic password of the user
        classic_mirror: <string> ADS Classic mirror this user belongs to

        Return data (on success):
        -------------------------
        classic_authed: <boolean> were they authenticated
        classic_email: <string> e-mail that authenticated correctly
        classic_mirror: <string> ADS Classic mirror this user selected

        HTTP Responses:
        --------------
        Succeed authentication: 200
        Bad/malformed data: 400
        User unknown/wrong password/failed authentication: 404
        ADS Classic give unknown messages: 500
        ADS Classic times out: 504

        Any other responses will be default Flask errors
        """
        post_data = get_post_data(request)

        # Collect the username, password from the request
        try:
            classic_email = post_data['classic_email']
            classic_password = post_data['classic_password']
            classic_mirror = post_data['classic_mirror']
        except KeyError:
            current_app.logger.warning(
                'User did not provide a required key: {}'
                .format(traceback.print_exc())
            )
            return err(CLASSIC_DATA_MALFORMED)

        # Check that the mirror exists and not man-in-the-middle
        if classic_mirror not in current_app.config['ADS_CLASSIC_MIRROR_LIST']:
            current_app.logger.warning(
                'User "{}" tried to use a mirror that does not exist: "{}"'
                .format(classic_email, classic_mirror)
            )
            return err(CLASSIC_BAD_MIRROR)

        # Create the correct URL
        url = current_app.config['ADS_CLASSIC_URL'].format(
            mirror=classic_mirror,
        )
        params = {
            'man_cmd': 'elogin',
            'man_email': classic_email,
            'man_passwd': classic_password
        }

        # Authenticate
        current_app.logger.info(
            'User "{email}" trying to authenticate at mirror "{mirror}"'
            .format(email=classic_email, mirror=classic_mirror)
        )
        try:
            response = requests.post(
                url,
                params=params
            )
        except requests.exceptions.Timeout:
            current_app.logger.warning(
                'ADS Classic end point timed out, returning to user'
            )
            return err(CLASSIC_TIMEOUT)

        if response.status_code >= 500:
            message, status_code = err(CLASSIC_UNKNOWN_ERROR)
            message['ads_classic'] = {
                'message': response.text,
                'status_code': response.status_code
            }
            current_app.logger.warning(
                'ADS Classic has responded with an unknown error: {}'
                .format(response.text)
            )
            return message, status_code

        # Sanity check the response
        email = response.json()['email']
        if email != classic_email:
            current_app.logger.warning(
                'User email "{}" does not match ADS return email "{}"'
                .format(classic_email, email)
            )
            return err(CLASSIC_AUTH_FAILED)

        # Respond to the user based on whether they were successful or not
        if response.status_code == 200 \
                and response.json()['message'] == 'LOGGED_IN' \
                and int(response.json()['loggedin']):
            current_app.logger.info(
                'Authenticated successfully "{email}" at mirror "{mirror}"'
                .format(email=classic_email, mirror=classic_mirror)
            )

            # Save cookie in myADS
            try:
                cookie = response.json()['cookie']
            except KeyError:
                current_app.logger.warning(
                    'Classic returned no cookie, cannot continue: {}'
                    .format(response.json())
                )
                return err(CLASSIC_NO_COOKIE)

            absolute_uid = self.helper_get_user_id()
            try:
                user = Users.query.filter(
                    Users.absolute_uid == absolute_uid
                ).one()

                current_app.logger.info('User already exists in database')
                user.classic_mirror = classic_mirror
                user.classic_cookie = cookie
                user.classic_email = classic_email
            except NoResultFound:
                current_app.logger.info('Creating entry in database for user')
                user = Users(
                    absolute_uid=absolute_uid,
                    classic_cookie=cookie,
                    classic_email=classic_email,
                    classic_mirror=classic_mirror
                )

            db.session.add(user)
            db.session.commit()

            current_app.logger.info(
                'Successfully saved content for "{}" to database: {{"cookie": "{}"}}'
                .format(classic_email, '*'*len(user.classic_cookie))
            )

            return {
                'classic_email': email,
                'classic_mirror': classic_mirror,
                'classic_authed': True
            }, 200
        else:
            current_app.logger.warning(
                'Credentials for "{email}" did not succeed at mirror "{mirror}"'
                .format(email=classic_email, mirror=classic_mirror)
            )
            return err(CLASSIC_AUTH_FAILED)
Beispiel #12
0
def main():
    parser = argparse.ArgumentParser(
        description="""Count comments in a given number of posts""")
    parser.add_argument(
        '-c', '--conf', type=str, metavar='', required=True,
        help='Specify the path of the configuration file')
    args = parser.parse_args()
    config_path = args.conf
    start = time.time()
    logger = get_logger(__name__)
    logger.setLevel(logging.DEBUG)
    conf = load_config(config_path)
    supported_languages = ["it", "en"]
    lang = input("Insert language (it, en): ")
    if lang not in supported_languages:
        logger.error("Please provide a valid language. Supported: 'en', 'it'")
        sys.exit(1)
    else:
        try:
            model = conf.get(lang)
            nlp = spacy.load(model)
        except OSError:
            logger.error("Could not find model in conf file. Please double check")
            sys.exit(0)
    n_posts = check_n_posts()
    if not n_posts.isdigit() and n_posts != "-1":
        logger.error("Please give a number. Exiting")
        sys.exit(0)
    try:
        access_token = conf["access_token"]
        page_id = conf["page_id"]
        n_top_entities = conf["n_top_entities"]
        data_dir_path = os.path.join(page_id, conf["data_dir_name"])
        data_filename = "{}_{}.tsv".format(conf["data_entities_prefix"], str(n_posts))
        plots_dir_path = os.path.join(page_id, conf["plots_dir_name"])
        barplot_filename = "{}_{}posts_ner.png".format(conf["barplot_filename"], str(n_posts))
        barplot_filepath = os.path.join(plots_dir_path, barplot_filename)
    except KeyError:
        logger.error(
            "Invalid configuration file. Please check template and retry")
        sys.exit(0)
    try:
        graph = facebook.GraphAPI(access_token)
        logger.info("Graph API connected")
        profile = graph.get_object(page_id)
    except facebook.GraphAPIError as e:
        logger.error("Could not log in. {}".format(e))
        sys.exit(0)
    if n_posts != "":
        logger.info("Getting the last {} posts".format(n_posts))
    else:
        logger.warning(
            "Requesting posts with no limits. "
            "This could be susceptible of limitations"
            " in the near future due to high rate"
        )
    local_start = time.time()
    posts = graph.get_connections(profile["id"], "posts", limit=n_posts)
    comments = []
    for post in posts["data"]:
        url_post = "https://www.facebook.com/posts/{}".format(post["id"])
        logger.info("Getting data for post {}".format(url_post))
        post_data = get_post_data(access_token, post["id"])
        post_comments = get_comments(post_data)
        if len(post_comments) == 0:
            logger.warning(
                """Apparently, there are no comments at the selected post
                Check the actual post on its Facebook page 
                https://www.facebook.com/posts/{}""".format(post["id"])
            )
        comments.extend(post_comments)
    if len(comments) == 0:
        logger.error("Could not get any comments. Exiting gracefully")
        sys.exit(0)
    elif len(comments) < 100:
        logger.warning(
            "Found {} comment(s). Not enough data "
            "to make much sense. Plots will be made regardless".format(
                len(comments)
            )
        )
    else:
        logger.info("Got {} comments from {} post(s) in {} seconds".format(
            len(comments), len(posts["data"]), round((time.time() - local_start), 1)))
    local_start = time.time()
    entities = []
    for comment in comments:
        ents = get_entities(nlp, comment)
        entities.extend(ents)
    logger.info("Extracted {} entities out of {} comments in {} seconds".format(
        len(entities), len(comments), round((time.time() - local_start), 2)))
    entities_data = count_entities(entities)
    create_nonexistent_dir(data_dir_path)
    data_filepath = os.path.join(data_dir_path, data_filename)
    columns = ["entities", "count"]
    data_to_tsv(entities_data, columns, data_filepath)
    logger.info("Saved {} unique entities and their counts in {} ".format(
        len(entities_data), data_filepath))
    create_nonexistent_dir(plots_dir_path)
    plot_labels = ["Entities", "Counts"]
    save_barplot(entities_data, plot_labels, n_top_entities, barplot_filepath, type_="entities")
    logger.info("Bar plot saved at {}".format(barplot_filepath))
    logger.info("\a\a\aDIN DONE! in {} seconds".format(
        round((time.time() - start), 1)))
Beispiel #13
0
    def post(self):
        """
        HTTP POST request that receives the user's ADS 2.0 credentials, and
        then contacts the Classic system to check that what the user provided
        is indeed valid. If valid, the users ID is stored.

        Post body:
        ----------
        KEYWORD, VALUE
        twopointoh_email: <string> ADS 2.0 e-mail of the user
        twopointoh_password: <string> ADS 2.0 password of the user

        Return data (on success):
        -------------------------
        twopointoh_authed: <boolean> were they authenticated
        twopointoh_email: <string> e-mail that authenticated correctly

        HTTP Responses:
        --------------
        Succeed authentication: 200
        Bad/malformed data: 400
        User unknown/wrong password/failed authentication: 404
        ADS Classic give unknown messages: 500
        ADS Classic times out: 504

        Any other responses will be default Flask errors
        """
        post_data = get_post_data(request)

        # Collect the username, password from the request
        try:
            twopointoh_email = post_data['twopointoh_email']
            twopointoh_password = post_data['twopointoh_password']
        except KeyError:
            current_app.logger.warning(
                'User did not provide a required key: {}'.format(
                    traceback.print_exc()))
            return err(CLASSIC_DATA_MALFORMED)

        # Create the correct URL
        url = current_app.config['ADS_CLASSIC_URL'].format(
            mirror=current_app.config['ADS_TWO_POINT_OH_MIRROR'], )
        params = {
            'man_cmd': 'elogin',
            'man_email': twopointoh_email,
            'man_passwd': twopointoh_password
        }

        # Authenticate
        current_app.logger.info(
            'User "{email}" trying to authenticate"'.format(
                email=twopointoh_email))
        try:
            response = current_app.client.post(url, params=params)
        except requests.exceptions.Timeout:
            current_app.logger.warning(
                'ADS Classic end point timed out, returning to user')
            return err(CLASSIC_TIMEOUT)

        if response.status_code >= 500:
            message, status_code = err(CLASSIC_UNKNOWN_ERROR)
            message['ads_classic'] = {
                'message': response.text,
                'status_code': response.status_code
            }
            current_app.logger.warning(
                'ADS Classic has responded with an unknown error: {}'.format(
                    response.text))
            return message, status_code

        # Sanity check the response
        email = response.json()['email']
        if email != twopointoh_email:
            current_app.logger.warning(
                'User email "{}" does not match ADS return email "{}"'.format(
                    twopointoh_email, email))
            return err(CLASSIC_AUTH_FAILED)

        # Respond to the user based on whether they were successful or not
        if response.status_code == 200 \
                and response.json()['message'] == 'LOGGED_IN' \
                and int(response.json()['loggedin']):
            current_app.logger.info(
                'Authenticated successfully "{email}"'.format(
                    email=twopointoh_email))

            absolute_uid = self.helper_get_user_id()
            with current_app.session_scope() as session:
                try:
                    user = session.query(Users).filter(
                        Users.absolute_uid == absolute_uid).one()

                    current_app.logger.info('User already exists in database')
                    user.twopointoh_email = twopointoh_email
                except NoResultFound:
                    current_app.logger.info(
                        'Creating entry in database for user')
                    user = Users(absolute_uid=absolute_uid,
                                 twopointoh_email=twopointoh_email)
                    session.add(user)
                session.commit()

                current_app.logger.info(
                    'Successfully saved content for "{}" to database'.format(
                        twopointoh_email))

                return {
                    'twopointoh_email': email,
                    'twopointoh_authed': True
                }, 200
            return err(HARBOUR_SERVICE_FAIL)
        else:
            current_app.logger.warning(
                'ADS 2.0 credentials for "{email}" did not succeed"'.format(
                    email=twopointoh_email))
            return err(CLASSIC_AUTH_FAILED)
Beispiel #14
0
    def post(self):
        """
        HTTP POST request that receives the user's ADS Classic credentials, and
        then contacts the Classic system to check that what the user provided is
        indeed valid. If valid, the users ID is stored within the myADS service
        store.

        Post body:
        ----------
        KEYWORD, VALUE
        classic_email: <string> ADS Classic e-mail of the user
        classic_password: <string> ADS Classic password of the user
        classic_mirror: <string> ADS Classic mirror this user belongs to

        Return data (on success):
        -------------------------
        classic_authed: <boolean> were they authenticated
        classic_email: <string> e-mail that authenticated correctly
        classic_mirror: <string> ADS Classic mirror this user selected

        HTTP Responses:
        --------------
        Succeed authentication: 200
        Bad/malformed data: 400
        User unknown/wrong password/failed authentication: 404
        ADS Classic give unknown messages: 500
        ADS Classic times out: 504

        Any other responses will be default Flask errors
        """
        post_data = get_post_data(request)
        with current_app.session_scope() as session:
            # Collect the username, password from the request
            try:
                classic_email = post_data['classic_email']
                classic_password = post_data['classic_password']
                classic_mirror = post_data['classic_mirror']
            except KeyError:
                current_app.logger.warning(
                    'User did not provide a required key: {}'.format(
                        traceback.print_exc()))
                return err(CLASSIC_DATA_MALFORMED)

            # Check that the mirror exists and not man-in-the-middle
            if classic_mirror not in current_app.config[
                    'ADS_CLASSIC_MIRROR_LIST']:
                current_app.logger.warning(
                    'User "{}" tried to use a mirror that does not exist: "{}"'
                    .format(classic_email, classic_mirror))
                return err(CLASSIC_BAD_MIRROR)

            # Create the correct URL
            url = current_app.config['ADS_CLASSIC_URL'].format(
                mirror=classic_mirror, )
            params = {
                'man_cmd': 'elogin',
                'man_email': classic_email,
                'man_passwd': classic_password
            }

            # Authenticate
            current_app.logger.info(
                'User "{email}" trying to authenticate at mirror "{mirror}"'.
                format(email=classic_email, mirror=classic_mirror))
            try:
                response = current_app.client.post(url, params=params)
            except requests.exceptions.Timeout:
                current_app.logger.warning(
                    'ADS Classic end point timed out, returning to user')
                return err(CLASSIC_TIMEOUT)

            if response.status_code >= 500:
                message, status_code = err(CLASSIC_UNKNOWN_ERROR)
                message['ads_classic'] = {
                    'message': response.text,
                    'status_code': response.status_code
                }
                current_app.logger.warning(
                    'ADS Classic has responded with an unknown error: {}'.
                    format(response.text))
                return message, status_code

            # Sanity check the response
            email = response.json()['email']
            if email != classic_email:
                current_app.logger.warning(
                    'User email "{}" does not match ADS return email "{}"'.
                    format(classic_email, email))
                return err(CLASSIC_AUTH_FAILED)

            # Respond to the user based on whether they were successful or not
            if response.status_code == 200 \
                    and response.json()['message'] == 'LOGGED_IN' \
                    and int(response.json()['loggedin']):
                current_app.logger.info(
                    'Authenticated successfully "{email}" at mirror "{mirror}"'
                    .format(email=classic_email, mirror=classic_mirror))

                # Save cookie in myADS
                try:
                    cookie = response.json()['cookie']
                except KeyError:
                    current_app.logger.warning(
                        'Classic returned no cookie, cannot continue: {}'.
                        format(response.json()))
                    return err(CLASSIC_NO_COOKIE)

                absolute_uid = self.helper_get_user_id()
                try:
                    user = session.query(Users).filter(
                        Users.absolute_uid == absolute_uid).one()

                    current_app.logger.info('User already exists in database')
                    user.classic_mirror = classic_mirror
                    user.classic_cookie = cookie
                    user.classic_email = classic_email
                except NoResultFound:
                    current_app.logger.info(
                        'Creating entry in database for user')
                    user = Users(absolute_uid=absolute_uid,
                                 classic_cookie=cookie,
                                 classic_email=classic_email,
                                 classic_mirror=classic_mirror)

                    session.add(user)

                session.commit()
                current_app.logger.info(
                    'Successfully saved content for "{}" to database: {{"cookie": "{}"}}'
                    .format(classic_email, '*' * len(user.classic_cookie)))

                return {
                    'classic_email': email,
                    'classic_mirror': classic_mirror,
                    'classic_authed': True
                }, 200

            else:
                current_app.logger.warning(
                    'Credentials for "{email}" did not succeed at mirror "{mirror}"'
                    .format(email=classic_email, mirror=classic_mirror))
                return err(CLASSIC_AUTH_FAILED)
Beispiel #15
0
def main():
    parser = argparse.ArgumentParser(
        description="""Count comments in a given number of posts""")
    parser.add_argument('-c',
                        '--conf',
                        type=str,
                        metavar='',
                        required=True,
                        help='Specify the path of the configuration file')
    args = parser.parse_args()
    config_path = args.conf
    start = time.time()
    logger = get_logger(__name__)
    logger.setLevel(logging.DEBUG)
    conf = load_config(config_path)
    n_posts = check_n_posts()
    if not n_posts.isdigit() and n_posts != "-1":
        logger.error("Please give a number. Exiting")
        sys.exit(0)
    try:
        access_token = conf["access_token"]
        page_id = conf["page_id"]
        n_top_words = conf["n_top_words"]
        data_dir_path = os.path.join(page_id, conf["data_dir_name"])
        data_filename = "{}_{}.tsv".format(conf["data_wc_prefix"],
                                           str(n_posts))
        plots_dir_path = os.path.join(page_id, conf["plots_dir_name"])
        wc_plot_filename = "{}_{}posts.png".format(conf["wc_plot_filename"],
                                                   str(n_posts))
        wc_plot_filepath = os.path.join(plots_dir_path, wc_plot_filename)
        barplot_filename = "{}_{}posts.png".format(conf["barplot_filename"],
                                                   str(n_posts))
        barplot_filepath = os.path.join(plots_dir_path, barplot_filename)
    except KeyError:
        logger.error(
            "Invalid configuration file. Please check template and retry")
        sys.exit(0)
    try:
        graph = facebook.GraphAPI(access_token)
        logger.info("Graph API connected")
        profile = graph.get_object(page_id)
    except facebook.GraphAPIError as e:
        logger.error("Could not log in. {}".format(e))
        sys.exit(0)
    local_start = time.time()
    posts = graph.get_connections(profile["id"], "posts", limit=n_posts)
    comments = []
    for post in posts["data"]:
        url_post = "https://www.facebook.com/posts/{}".format(post["id"])
        logger.info("Getting data for post {}".format(url_post))
        post_data = get_post_data(access_token, post["id"])
        post_comments = get_comments(post_data)
        if len(post_comments) == 0:
            logger.warning(
                """Apparently, there are no comments at the selected post
                Check the actual post on its Facebook page 
                https://www.facebook.com/posts/{}""".format(post["id"]))
        comments.extend(post_comments)
    if len(comments) == 0:
        logger.error("Could not get any comments. Exiting gracefully")
        sys.exit(0)
    elif len(comments) < 100:
        logger.warning(
            "Found {} comment(s). Not enough data "
            "to make much sense. Plots will be made regardless".format(
                len(comments)))
    else:
        logger.info("Got {} comments from {} post(s) in {} seconds".format(
            len(comments), len(posts["data"]),
            round((time.time() - local_start), 1)))
    local_start = time.time()
    preprocessed_comments = [
        TextPreprocessor(comm).preprocess() for comm in comments
    ]
    logger.info("Preprocessed {} comments out of {} in {} seconds".format(
        len(preprocessed_comments), len(comments),
        round((time.time() - local_start), 2)))
    wordcount_data = do_wordcount(preprocessed_comments)
    create_nonexistent_dir(data_dir_path)
    data_filepath = os.path.join(data_dir_path, data_filename)
    columns = ["word", "count"]
    data_to_tsv(wordcount_data, columns, data_filepath)
    logger.info("Saved {} words and their counts in {} ".format(
        len(wordcount_data), data_filepath))
    create_nonexistent_dir(plots_dir_path)
    plot_labels = ["Words", "Counts"]
    save_barplot(wordcount_data, plot_labels, n_top_words, barplot_filepath)
    logger.info("Bar plot saved at {}".format(barplot_filepath))
    unstemmed_comments = [
        TextPreprocessor(comm).base_preprocess() for comm in comments
    ]
    long_string = " ".join(uc for uc in unstemmed_comments)
    p = Plotter(long_string)
    p.save_wordcloud_plot(wc_plot_filepath)
    logger.info("Wordcloud plot saved at {}".format(wc_plot_filepath))
    logger.info("\a\a\aDIN DONE! in {} seconds".format(
        round((time.time() - start), 1)))
Beispiel #16
0
def main():
    parser = argparse.ArgumentParser(
        description="""Count comments in a given post""")
    parser.add_argument('-c',
                        '--conf',
                        type=str,
                        metavar='',
                        required=True,
                        help='Specify the path of the configuration file')
    args = parser.parse_args()
    config_path = args.conf
    start = time.time()
    logger = get_logger(__name__)
    logger.setLevel(logging.DEBUG)
    conf = load_config(config_path)
    post_id = ""
    while post_id == "":
        post_id = input("Provide post ID: ")
    try:
        access_token = conf["access_token"]
        page_id = conf["page_id"]
        n_top_words = conf["n_top_words"]
        data_dir_path = os.path.join(page_id, conf["data_dir_name"])
        data_filename = "{}_{}{}".format(conf["data_wc_prefix"], post_id,
                                         ".csv")
        plots_dir_path = os.path.join(page_id, conf["plots_dir_name"],
                                      "single_posts", post_id)
        wc_plot_filename = "{}_{}{}".format(conf["wc_plot_filename"], post_id,
                                            ".png")
        wc_plot_filepath = os.path.join(plots_dir_path, wc_plot_filename)
        barplot_filename = "{}_{}{}".format(conf["barplot_filename"], post_id,
                                            ".png")
        barplot_filepath = os.path.join(plots_dir_path, barplot_filename)
    except KeyError:
        logger.error(
            "Invalid configuration file. Please check template and retry")
        sys.exit(0)
    url_post = "https://www.facebook.com/posts/{}".format(post_id)
    logger.info("Getting data for post {}".format(url_post))
    actual_post_id = page_id + "_" + post_id
    local_start = time.time()
    data = get_post_data(access_token, actual_post_id)
    comments = get_comments(data)
    if len(comments) == 0:
        logger.error("""Apparently, there are no comments at the selected post
            Check the actual post on its Facebook page 
            https://www.facebook.com/{}/posts/{}""".format(page_id, post_id))
        sys.exit(0)
    elif len(comments) < 100:
        logger.warning(
            "Got {} comments. Not enough data "
            "to make much sense. Plots will be made regardless".format(
                len(comments)))
    else:
        logger.info("Got {} comments in {} seconds".format(
            len(comments), round((time.time() - local_start), 2)))
    local_start = time.time()
    preprocessed_comments = [
        TextPreprocessor(comm).preprocess() for comm in comments
    ]
    logger.info("Preprocessed {} comments out of {} in {} seconds".format(
        len(preprocessed_comments), len(comments),
        round((time.time() - local_start), 1)))
    logger.info("Performing word count")
    wordcount_data = do_wordcount(preprocessed_comments)
    create_nonexistent_dir(data_dir_path)
    data_filepath = os.path.join(data_dir_path, data_filename)
    columns = ["word", "count"]
    data_to_tsv(wordcount_data, columns, data_filepath)
    logger.info("Saved {} words and their counts in {} ".format(
        len(wordcount_data), data_filepath))
    create_nonexistent_dir(plots_dir_path)
    plot_labels = ["Words", "Counts"]
    save_barplot(wordcount_data, plot_labels, n_top_words, barplot_filepath)
    logger.info("Bar plot saved at {}".format(barplot_filepath))
    unstemmed_comments = [
        TextPreprocessor(comm).base_preprocess() for comm in comments
    ]
    long_string = " ".join(uc for uc in unstemmed_comments)
    p = Plotter(long_string)
    p.save_wordcloud_plot(wc_plot_filepath)
    logger.info("Word Cloud plot saved at {}".format(wc_plot_filepath))
    logger.info("\a\a\aDIN DONE!")
    logger.info("Total time of execution: {} seconds".format(
        round((time.time() - start), 1)))
Beispiel #17
0
def main():
    parser = argparse.ArgumentParser(
        description="""Count comments in a given number of posts""")
    parser.add_argument('-c',
                        '--conf',
                        type=str,
                        metavar='',
                        required=True,
                        help='Specify the path of the configuration file')
    args = parser.parse_args()
    config_path = args.conf
    start = time.time()
    logger = get_logger(__name__)
    logger.setLevel(logging.DEBUG)
    conf = load_config(config_path)
    n_posts = check_n_posts()
    if not n_posts.isdigit():
        logger.error("Please give a number. Exiting")
        sys.exit(0)
    try:
        access_token = conf["access_token"]
        page_id = conf["page_id"]
    except KeyError:
        logger.error(
            "Invalid configuration file. Please check template and retry")
        sys.exit(0)
    try:
        graph = facebook.GraphAPI(access_token)
        logger.info("Graph API connected")
        profile = graph.get_object(page_id)
    except facebook.GraphAPIError as e:
        logger.error("Could not log in. {}".format(e))
        sys.exit(0)
    local_start = time.time()
    posts = graph.get_connections(profile["id"], "posts", limit=n_posts)
    comments = []
    for post in posts["data"]:
        url_post = "https://www.facebook.com/posts/{}".format(post["id"])
        logger.info("Getting data for post {}".format(url_post))
        post_data = get_post_data(access_token, post["id"])
        post_comments = get_comments(post_data)
        if len(post_comments) == 0:
            logger.warning(
                """Apparently, there are no comments at the selected post
                Check the actual post on its Facebook page 
                https://www.facebook.com/posts/{}""".format(post["id"]))
        comments.extend(post_comments)
    if len(comments) == 0:
        logger.error("Could not get any comments. Exiting gracefully")
        sys.exit(0)
    elif len(comments) < 100:
        logger.warning(
            "Found {} comment(s). Not enough data "
            "to make much sense. Plots will be made regardless".format(
                len(comments)))
    else:
        logger.info("Got {} comments from {} post(s) in {} seconds".format(
            len(comments), len(posts["data"]),
            round((time.time() - local_start), 1)))
    data_dir_name = os.path.join(page_id, conf["data_dir_name"])
    create_nonexistent_dir(data_dir_name)
    data_filename = "{}_comments.tsv".format(len(comments))
    data_filepath = os.path.join(data_dir_name, data_filename)
    data = zip(comments, [0] * len(comments))
    columns = ["comment", "sentiment"]
    data_to_tsv(data, columns, data_filepath)
    logger.info("Saved {} comments in {} ".format(len(comments),
                                                  data_filepath))
    logger.info("\a\a\aDIN DONE! in {} seconds".format(
        round((time.time() - start), 1)))
Beispiel #18
0
    def post(self):
        """
        HTTP GET request

        There are two simple steps:
            1. Send a query to myads-service in 'store-query' that contains
               the list of bibcodes in the user's ADS Classic search
            2. Return a URL with the relevant queryid that the user can be
               forwarded to

        When the user clicks the URL, it will use execute-query to run the
        relevant query via Solr's Bigquery.

        Returns:
        302: redirect to the relevant URL

        :return: str
        """

        # Setup the data
        current_app.logger.info('Received data, headers: {}'.format(
            request.headers))
        data = get_post_data(request)

        if not isinstance(data, list):
            current_app.logger.error(
                'User passed incorrect format: {}, {}'.format(
                    type(data), data))
            abort(400)
        elif not all([isinstance(i, unicode) for i in data]):
            current_app.logger.error(
                'List contains non-unicode characters: {}'.format(data))
            abort(400)

        bigquery_data = {
            'bigquery': ['bibcode\n' + '\n'.join(data)],
            'q': ['*:*'],
            'fq': ['{!bitset}']
        }

        # POST the query
        # https://api.adsabs.harvard.edu/v1/vault/query
        current_app.logger.info('Contacting vault/query')
        r = client().post(current_app.config['VAULT_QUERY_URL'],
                          data=bigquery_data)

        if r.status_code != 200:
            current_app.logger.warning(
                'vault/query returned non-200 exit status: {}'.format(r.text))
            return r.text, r.status_code, r.headers.items()

        # Get back a query id
        current_app.logger.info('vault/query returned: {}'.format(r.json()))
        query_id = r.json()['qid']

        # Formulate the url based on the query id
        redirect_url = '{BBB_URL}/#search/q=*%3A*&__qid={query_id}'.format(
            BBB_URL=current_app.config['BUMBLEBEE_URL'], query_id=query_id)
        current_app.logger.info('Returning redirect: {}'.format(redirect_url))

        # Return the query id to the user
        return {'redirect': redirect_url}, 200
Beispiel #19
0
def main():
    parser = argparse.ArgumentParser(
        description="""Count comments in a given number of posts""")
    parser.add_argument('-c',
                        '--conf',
                        type=str,
                        metavar='',
                        required=True,
                        help='Specify the path of the configuration file')
    args = parser.parse_args()
    config_path = args.conf
    start = time.time()
    logger = get_logger(__name__)
    logger.setLevel(logging.DEBUG)
    conf = load_config(config_path)
    supported_languages = ["it", "en"]
    lang = input("Insert language (it, en): ")
    if lang not in supported_languages:
        logger.error("Please provide a valid language. Supported: 'en', 'it'")
        sys.exit(1)
    else:
        try:
            model = conf.get(lang)
            nlp = spacy.load(model)
        except OSError:
            logger.error(
                "Could not find model in conf file. Please double check")
            sys.exit(0)
    post_id = ""
    while post_id == "":
        post_id = input("Provide post ID: ")
    try:
        access_token = conf["access_token"]
        page_id = conf["page_id"]
        n_top_entities = conf["n_top_entities"]
        data_dir_path = os.path.join(page_id, conf["data_dir_name"])
        data_filename = "{}_{}{}".format(conf["data_entities_prefix"], post_id,
                                         ".csv")
        plots_dir_path = os.path.join(page_id, conf["plots_dir_name"])
        barplot_filename = "{}_{}{}".format(conf["barplot_filename"], post_id,
                                            "_ner.png")
        barplot_filepath = os.path.join(plots_dir_path, barplot_filename)
    except KeyError:
        logger.error(
            "Invalid configuration file. Please check template and retry")
        sys.exit(0)
    actual_post_id = page_id + "_" + post_id
    url_post = "https://www.facebook.com/posts/{}".format(actual_post_id)
    logger.info("Getting data for post {}".format(url_post))
    local_start = time.time()
    data = get_post_data(access_token, actual_post_id)
    comments = get_comments(data)
    if len(comments) == 0:
        logger.error("""Apparently, there are no comments at the selected post
            Check the actual post on its Facebook page 
            https://www.facebook.com/{}/posts/{}""".format(page_id, post_id))
        sys.exit(0)
    elif len(comments) < 100:
        logger.warning(
            "Got {} comments. Not enough data "
            "to make much sense. Plots will be made regardless".format(
                len(comments)))
    else:
        logger.info("Got {} comments in {} seconds".format(
            len(comments), round((time.time() - local_start), 2)))
    local_start = time.time()
    entities = []
    for comment in comments:
        ents = get_entities(nlp, comment)
        entities.extend(ents)
    logger.info(
        "Extracted {} entities out of {} comments in {} seconds".format(
            len(entities), len(comments), round((time.time() - local_start),
                                                2)))
    entities_data = count_entities(entities)
    create_nonexistent_dir(data_dir_path)
    data_filepath = os.path.join(data_dir_path, data_filename)
    columns = ["entities", "count"]
    data_to_tsv(entities_data, columns, data_filepath)
    logger.info("Saved {} unique entities and their counts in {} ".format(
        len(entities_data), data_filepath))
    create_nonexistent_dir(plots_dir_path)
    plot_labels = ["Entities", "Counts"]
    save_barplot(entities_data, plot_labels, n_top_entities, barplot_filepath)
    logger.info("Bar plot saved at {}".format(barplot_filepath))
    logger.info("\a\a\aDIN DONE! in {} seconds".format(
        round((time.time() - start), 1)))
Beispiel #20
0
def get_all_page_data(url, is_community=False):

    name = url.split("/")[-1] if len(
        url.split("/")[-1]) > 0 else url.split("/")[-2]

    if is_community:
        name = os.path.join(name, "community")
        url = url + "/community"

    data_path = os.path.join(".", "data")
    if not os.path.exists(data_path):
        os.mkdir(data_path)

    page_data_path = os.path.join(data_path, name)
    if not os.path.exists(page_data_path):
        os.mkdir(page_data_path)

    should_scrape_headless = is_community == False
    driver = initialize_driver(args.chrome,
                               args.windows,
                               is_headless=should_scrape_headless)

    driver.get(url)

    page_name = get_text(driver, './/a[@class="_64-f"]')

    print(f"Scrolling {url} until {cutoff_date}")

    scroll(driver, pd.to_datetime(cutoff_date))

    posts = driver.find_elements_by_xpath(
        '//div[contains(@class, "userContentWrapper")]')

    post_links = [get_post_links(post) for post in tqdm(posts)]

    post_links = list(set(post_links))

    with open(os.path.join(page_data_path, 'post_links.json'), 'w') as f:
        json.dump(post_links, f)

    driver.quit()

    print(f"Now scraping {len(post_links)} posts from {name}")

    for i, post_link in enumerate(post_links):

        if not is_string_url(post_link):
            continue

        print(f"Scraping {post_link}")

        driver = initialize_driver(args.chrome, args.windows)

        driver.get(post_link)

        if "/videos/" in post_link:
            post_type = "videos"
        elif "/photos/" in post_link:
            post_type = "photos"
        elif "/posts/" in post_link:
            post_type = "posts"
        elif "/notes/" in post_link:
            post_type = "notes"
        else:
            post_type = "other"

        if post_type == "notes":
            post_element = driver.find_element_by_xpath(
                './/div[contains(@class, "fb_content")]')
        else:
            post_element = driver.find_element_by_xpath(
                './/div[contains(@class, "userContentWrapper")]')

        post_data = get_post_data(driver, post_element, post_type)

        post_data["page_name"] = page_name

        with open(os.path.join(page_data_path, f'page_post_{i}.json'),
                  'w') as f:
            json.dump(post_data, f)

        driver.quit()

    if not is_community:
        get_all_page_data(url, is_community=True)
Beispiel #21
0
    def post(self):
        """
        HTTP GET request

        There are two simple steps:
            1. Send a query to myads-service in 'store-query' that contains
               the list of bibcodes in the user's ADS Classic search
            2. Return a URL with the relevant queryid that the user can be
               forwarded to

        When the user clicks the URL, it will use execute-query to run the
        relevant query via Solr's Bigquery.

        Returns:
        302: redirect to the relevant URL

        :return: str
        """

        # Setup the data
        current_app.logger.info('Received data, headers: {}'.format(request.headers))
        data = get_post_data(request)

        if not isinstance(data, list):
            current_app.logger.error(
                'User passed incorrect format: {}, {}'.format(type(data), data)
            )
            abort(400)
        elif not all([isinstance(i, unicode) for i in data]):
            current_app.logger.error(
                'List contains non-unicode characters: {}'.format(data)
            )
            abort(400)

        bigquery_data = {
            'bigquery': ['bibcode\n' + '\n'.join(data)],
            'q': ['*:*'],
            'fq': ['{!bitset}']
        }

        # POST the query
        # https://api.adsabs.harvard.edu/v1/vault/query
        current_app.logger.info('Contacting vault/query')
        r = client().post(
            current_app.config['VAULT_QUERY_URL'],
            data=bigquery_data
        )

        if r.status_code != 200:
            current_app.logger.warning(
                'vault/query returned non-200 exit status: {}'.format(r.text)
            )
            return r.text, r.status_code, r.headers.items()

        # Get back a query id
        current_app.logger.info('vault/query returned: {}'.format(r.json()))
        query_id = r.json()['qid']

        # Formulate the url based on the query id
        redirect_url = '{BBB_URL}/#search/q=*%3A*&__qid={query_id}'.format(
            BBB_URL=current_app.config['BUMBLEBEE_URL'],
            query_id=query_id
        )
        current_app.logger.info(
            'Returning redirect: {}'.format(redirect_url)
        )

        # Return the query id to the user
        return {'redirect': redirect_url}, 200