Example #1
0
    def init(cls, session, force_drop,
             ignore_inactive=False,
             force_inactive=False,
             ignore_redirected=False):
        configure_logging('init',
                          console_level='INFO',
                          file_level='WARNING')
        dt_before = datetime.utcnow()
        logging.info('Creating database tables:')
        if force_drop is True:
            logging.warning('Existed tables would be dropped and recreated!')
            Base.metadata.drop_all(ENGINE)
        else:
            logging.warning('Ignore existed tables')
        Base.metadata.create_all(ENGINE)
        logging.info('Inserting platforms if not exist')
        get_or_create_m(session, Platform, TWITTER_PLATFORM_DICT, fb_uk='name')
        get_or_create_m(session, Platform, WEB_PLATFORM_DICT, fb_uk='name')
        logging.info('Trying to load site data:')
        dc_file = join(HOAXY_HOME, 'domains_claim.txt')
        df_file = join(HOAXY_HOME, 'domains_factchecking.txt')
        site_file = join(HOAXY_HOME, 'sites.yaml')
        if isfile(dc_file) is True:
            logging.info('Claim domains %s found', dc_file)
            SiteCmd.load_domains(session, dc_file, site_type='claim',
                                 ignore_inactive=ignore_inactive,
                                 force_inactive=force_inactive,
                                 ignore_redirected=ignore_redirected)
        else:
            logging.info('Claim domains %s not found', dc_file)
        if isfile(df_file) is True:
            logging.info('Fact checking domains %s found', df_file)
            SiteCmd.load_domains(session, df_file, site_type='fact_checking',
                                 ignore_inactive=ignore_inactive,
                                 force_inactive=force_inactive,
                                 ignore_redirected=ignore_redirected)
        else:
            logging.info('Fact checking domains %s not found', df_file)

        if isfile(site_file) is True:
            logging.info('Site file %s found', site_file)
            SiteCmd.load_sites(session, site_file,
                               ignore_inactive=ignore_inactive,
                               force_inactive=force_inactive,
                               ignore_redirected=ignore_redirected)
        else:
            logging.info('Site file %s not found', site_file)
        sites = session.query(Site.domain, Site.site_type, Site.base_url
                              ).filter(or_(
                                  Site.created_at > dt_before,
                                  Site.updated_at > dt_before
                              )).order_by(Site.id).all()
        logger.info("Added or updated sites are:\n %s", pprint.pformat(sites))
        logger.info("Done.")
Example #2
0
    def load_domains(cls,
                     session,
                     fn,
                     site_type,
                     ignore_inactive=False,
                     force_inactive=False,
                     ignore_redirected=False,
                     exclusive=False):
        if exclusive:
            # disable existing domains of the same site type
            ob_expr = Site.id.asc()
            msites = get_msites(session, fb_kw=None, ob_expr=ob_expr)
            for site in msites:
                if site.site_type == site_type:
                    cls.disable_site(session, site)
        logger.info('Sending HTTP requests to infer base URLs ...')
        with open(fn, 'r') as f:
            site_tuples = [(n + 1, line) + parse_domain(line, site_type)
                           for n, line in enumerate(f)
                           if not is_comment_line(line)]
        invalid_flag = False
        inactive_flag = False
        redirected_flag = False
        for n, line, site, status in site_tuples:
            line = line.strip('\n')
            if status == 'invalid':
                invalid_flag = True
                logger.error('line %i %r, invalid domain', n, line)
            elif status == 'inactive':
                inactive_flag = True
                logger.warning('line %i %r, domain inactive!', n, line)
            elif status == 'redirected':
                redirected_flag = True
                logger.warning('line %i %r, domain redirected to %s!', n, line,
                               site['base_url'])
        if invalid_flag is True or \
                (inactive_flag is True and (ignore_inactive is False and
                                            force_inactive is False)) or \
                (redirected_flag is True and ignore_redirected is False):
            logger.error("""Please fix the warnings or errors above! \
Edit domains, or use --ignore-redirected to handle redirected domains', \
or Use --ignore-inactive or --force-inactive  to handle inactive domains""")
            raise SystemExit(2)
        for n, line, site, status in site_tuples:
            if status == 'inactive' and ignore_inactive is True:
                continue
            elif status == 'redirected' and ignore_redirected is True:
                continue
            else:
                site['is_enabled'] = True
                get_or_create_m(
                    session, Site, site, fb_uk='domain', onduplicate='update')
                logger.debug('Insert or update site %s', site['domain'])
Example #3
0
 def add_site_tags(cls, session, msite, source, tags):
     """Add site_tags for a site."""
     owned_tags = [(mt.name, mt.source) for mt in msite.site_tags]
     fb_uk = ['name', 'source']
     for tag in tags:
         tag_data = dict(name=tag, source=source)
         if (tag, source) in owned_tags:
             logger.warning('Site %r already contains tag %r!', msite.name,
                            tag_data)
         else:
             mtag = get_or_create_m(session, SiteTag, tag_data, fb_uk=fb_uk)
             msite.site_tags.append(mtag)
             logger.info('Added tag %r to Site %r', tag_data, msite.name)
             session.commit()
Example #4
0
 def run(cls, args):
     try:
         # print(args)
         args = cls.args_schema.validate(args)
     except SchemaError as e:
         sys.exit(e)
     session = Session()
     # make sure lucene be inited
     lucene.initVM()
     lucene.getVMEnv().attachCurrentThread()
     if args['--index'] is True:
         configure_logging(
             'lucene.index', console_level=args['--console-log-level'])
         mgid = get_or_create_m(
             session,
             MetaInfo,
             data=dict(
                 name='article_group_id_lucene_index',
                 value='0',
                 value_type='int',
                 description='article.group_id used for lucene index'),
             fb_uk='name')
         if args['--mode'] == 'create':
             mgid.set_value(0)
             session.commit()
         logger.debug('Indexing started.. Getting articles..')
         q = """
         SELECT DISTINCT ON (a.group_id) a.id, a.group_id,
             a.canonical_url,
             a.title, a.meta, a.content,
             coalesce(a.date_published, a.date_captured) AS pd,
             s.domain, s.site_type
         FROM article AS a
             JOIN site AS s ON s.id=a.site_id
         WHERE a.site_id IS NOT NULL AND s.is_enabled IS TRUE
             AND a.group_id>:gid
         ORDER BY group_id, pd ASC
         """
         articles_iter = session.execute(
             sqlalchemy.text(q).bindparams(gid=mgid.get_value()))
         cls.index(session, args['--mode'], articles_iter, mgid)
     elif args['--search'] is True:
         configure_logging(
             'lucene.search', console_level=args['--console-log-level'])
         cls.search(args['--query'], args['--top'])
     else:
         print("Unrecognized command!")
         sys.exit(2)
Example #5
0
 def replace_site_tags(cls, session, msite, source, tags):
     """Replace old site_tags with new ones."""
     adding_tags = [(t, source) for t in tags]
     owned_tags = [(mt.name, mt.source) for mt in msite.site_tags]
     for t in tags:
         if (t, source) not in owned_tags:
             tag_data = dict(name=t, source=source)
             fb_uk = ['name', 'source']
             mtag = get_or_create_m(session, SiteTag, tag_data, fb_uk=fb_uk)
             msite.site_tags.append(mtag)
     for mt in msite.site_tags:
         if (mt.name, mt.source) not in adding_tags:
             session.delete(mt)
     session.commit()
     logger.info('Replace site tags for site %r from %r to %r', msite.name,
                 owned_tags, adding_tags)
Example #6
0
    def parse(self, jd):
        """The main parse function.

        Parameters
        ---------
        jd : json
            Tweet json data.

        Procedures
        ----------
        1) validate `jd`
        2) extract URL and hashtag from `jd`
        3) insert into database
        """
        logger.debug('Parsing one tweet, begin')
        #
        # validation
        #
        try:
            tw_raw_id = jd['id']
            created_at = utc_from_str(jd['created_at'])
            user_raw_id = jd['user']['id']
        except KeyError as e:
            logger.error('Invalid tweet: %s', e)
            return None
        #
        # extract url, hashtag and associated tweet status id
        #
        urls_set = set()
        hashtags_set = set()
        entities_list = []
        if 'entities' in jd:
            entities_list.append(jd['entities'])
        if 'quoted_status' in jd:
            q_jd = jd['quoted_status']
            if 'entities' in q_jd:
                entities_list.append(q_jd['entities'])
        if 'retweeted_status' in jd:
            re_jd = jd['retweeted_status']
            if 'entities' in re_jd:
                entities_list.append(re_jd['entities'])
            if 'quoted_status' in re_jd and\
                    'entities' in re_jd['quoted_status']:
                entities_list.append(re_jd['quoted_status']['entities'])
        for entities in entities_list:
            if entities:
                self._parse_entities(entities, urls_set, hashtags_set)
        # This tweet should contain urls
        if len(urls_set) == 0 and self.save_none_url_tweet is False:
            logger.debug('No url found in %s, ignore!', tw_raw_id)
            return None
        #
        # Insert into database
        #
        # creating user
        logger.debug('creating user')
        muser = get_or_create_m(self.session,
                                TwitterUser,
                                data=dict(raw_id=user_raw_id),
                                fb_uk='raw_id')
        # creating tweet
        logger.debug('creating tweet')
        mtweet = Tweet(raw_id=tw_raw_id,
                       json_data=jd,
                       created_at=created_at,
                       user_id=muser.id)
        self.session.add(mtweet)
        try:
            self.session.commit()
            logger.debug('Inserted tweet %r', tw_raw_id)
        except IntegrityError as e:
            logger.warning('Tweet %s existed in db: %s', tw_raw_id, e)
            self.session.rollback()
            return
        # creating urls
        logger.debug('creating urls')
        for url in urls_set:
            murl = get_or_create_murl(self.session,
                                      data=dict(raw=url),
                                      platform_id=self.platform_id)
            self.session.add(AssTweetUrl(tweet_id=mtweet.id, url_id=murl.id))
            try:
                self.session.commit()
            except IntegrityError as e:
                logger.error('ass_tweet_url IntegrityError, see: %s', e)
                self.session.rollback()
        # creating hashtags
        logger.debug('creating hashtags')
        for hashtag in hashtags_set:
            mhashtag = get_or_create_m(self.session,
                                       Hashtag,
                                       data=dict(text=hashtag),
                                       fb_uk='text')
            self.session.add(
                AssTweetHashtag(tweet_id=mtweet.id, hashtag_id=mhashtag.id))
            try:
                self.session.commit()
            except IntegrityError as e:
                logger.error('ass_tweet_hashtag IntegrityError, see: %s', e)
                self.session.rollback()
        # paring associate tweet
        q1 = """
        INSERT INTO ass_tweet (id, retweeted_status_id, quoted_status_id,
            in_reply_to_status_id)
        SELECT id,
            CAST(json_data#>>'{retweeted_status, id}' AS BIGINT),
            CAST(json_data#>>'{quoted_status, id}' AS BIGINT),
            CAST(json_data->>'in_reply_to_status_id' AS BIGINT)
        FROM tweet
        WHERE id=:tweet_id
        """
        q1 = text(q1).bindparams(tweet_id=mtweet.id)
        try:
            self.session.execute(q1)
            self.session.commit()
        except DataError as e:
            # Handle \u0000 exception that postgresql json do not support
            logger.warning(e)
            self.session.rollback()
            q2 = r"""
            UPDATE tweet SET json_data=regexp_replace(
                        json_data::text, '\\u0000', '\\\\u0000', 'g')::json
            WHERE id=:tweet_id
            """
            q2 = text(q2).bindparams(tweet_id=mtweet.id)
            self.session.execute(q2)
            self.session.commit()
            logger.warning('json_data is updated (\\u0000 to \\\\u0000)')
            self.session.execute(q1)
            self.session.commit()
        logger.debug('Parsing one tweet, done.')
Example #7
0
 def parse_new_one(self, jd, session, g_urls_map, g_uusers_set,
                   g_edges_set):
     # validate jd
     jd = replace_null_byte(jd)
     try:
         tw_raw_id = jd['id']
         created_at = utc_from_str(jd['created_at'])
         user_raw_id = jd['user']['id']
     except KeyError as e:
         logger.error('Invalid tweet: %s', e)
         return None
     # parsing, level 1
     l_urls, l_mentions, l_hashtags = self._parse_l1(jd)
     if len(l_urls['union']) == 0 and self.save_none_url_tweet is False:
         logger.warning('Ignore tweet %r with no urls!', tw_raw_id)
         return None
     # saving, level 1
     logger.debug('Saving this user ...')
     muser = get_or_create_m(session,
                             TwitterUser,
                             data=dict(raw_id=user_raw_id),
                             fb_uk='raw_id')
     logger.debug('Saving this tweet ...')
     muser_id = muser.id
     mtweet = Tweet(raw_id=tw_raw_id,
                    json_data=jd,
                    created_at=created_at,
                    user_id=muser_id)
     session.add(mtweet)
     try:
         session.commit()
         logger.debug('Inserted tweet %r', tw_raw_id)
     except IntegrityError as e:
         logger.warning('Tweet %s existed in db: %s', tw_raw_id, e)
         session.rollback()
         return None
     mtweet_id = mtweet.id
     logger.debug('Saving AssTweet ...')
     retweeted_status_id = None
     quoted_status_id = None
     if 'quoted_status' in jd:
         quoted_status_id = jd['quoted_status']['id']
     if 'retweeted_status' in jd:
         retweeted_status_id = jd['retweeted_status']['id']
     in_reply_to_status_id = jd['in_reply_to_status_id']
     session.add(
         AssTweet(id=mtweet_id,
                  retweeted_status_id=retweeted_status_id,
                  quoted_status_id=quoted_status_id,
                  in_reply_to_status_id=in_reply_to_status_id))
     try:
         session.commit()
     except IntegrityError as e:
         logger.warning(e)
         session.rollback()
     logger.debug('Saving urls ...')
     for u in l_urls['union']:
         if len(u) > MAX_URL_LEN:
             murl_id = -1
         else:
             murl_id = get_or_create_murl(session,
                                          data=dict(raw=u),
                                          platform_id=self.platform_id).id
             # Saving AssTweetUrl
             session.add(AssTweetUrl(tweet_id=mtweet_id, url_id=murl_id))
             try:
                 session.commit()
             except IntegrityError as e:
                 logger.error('ass_tweet_url IntegrityError, see: %s', e)
                 session.rollback()
         g_urls_map[u] = murl_id
     # creating hashtags
     logger.debug('creating hashtags ...')
     for hashtag in l_hashtags['union']:
         mhashtag = get_or_create_m(session,
                                    Hashtag,
                                    data=dict(text=hashtag),
                                    fb_uk='text')
         session.add(
             AssTweetHashtag(tweet_id=mtweet.id, hashtag_id=mhashtag.id))
         try:
             session.commit()
         except IntegrityError as e:
             logger.error('ass_tweet_hashtag IntegrityError, see: %s', e)
             session.rollback()
     self._parse_l2(jd, l_urls, l_mentions, g_urls_map, g_uusers_set,
                    g_edges_set)
Example #8
0
    def parse(self, jd):
        """The main parse function.

        Parameters
        ---------
        jd : json
            Tweet json data.

        Procedures
        ----------
        1) do roughly parsing to validate `jd`
        2) carefully parsing and insert into database
        3) other associations
        """
        logger.debug('Parsing one tweet, begin ...')
        #
        # 1) do roughly parsing to validate the tweet
        #
        # 1-1) parsing necessary fields, if failed then it is not a valid tweet
        logger.debug('Replacing null byte if existing ...')
        jd = replace_null_byte(jd, self.fp)
        logger.debug('1) Roughly parsing ...')
        try:
            tw_raw_id = jd['id']
            created_at = utc_from_str(jd['created_at'])
            user_raw_id = jd['user']['id']
        except KeyError as e:
            logger.error('Invalid tweet: %s', e)
            return None
        # 1-2) roughly parsing
        entities_list = []
        quoted_status_id = None
        retweeted_status_id = None
        if 'entities' in jd:
            entities_list.append(jd['entities'])
        if 'quoted_status' in jd:
            quoted_jd = jd['quoted_status']
            quoted_user_jd = jd['quoted_status']['user']
            quoted_status_id = quoted_jd['id']
            if 'entities' in quoted_jd:
                entities_list.append(quoted_jd['entities'])
        if 'retweeted_status' in jd:
            retweeted_jd = jd['retweeted_status']
            retweeted_user_jd = jd['retweeted_status']['user']
            retweeted_status_id = retweeted_jd['id']
            if 'entities' in retweeted_jd:
                entities_list.append(retweeted_jd['entities'])
        in_reply_to_status_id = jd['in_reply_to_status_id']
        in_reply_to_user_id = jd['in_reply_to_user_id']
        in_reply_to_screen_name = jd['in_reply_to_screen_name']

        urls_set = set()
        hashtags_set = set()
        mentions_set = set()
        for entities in entities_list:
            if entities:
                self._parse_entities(entities, urls_set, hashtags_set,
                                     mentions_set)
        # This tweet should contain urls
        if len(urls_set) == 0 and self.save_none_url_tweet is False:
            logger.warning('No url found in tweet %s, ignore!', tw_raw_id)
            return None
        #
        # 2) carefully parsing and saving into database
        #
        logger.debug('2) Carefully parsing and saving ...')
        logger.debug('2-0) Saving twitter_user raw_id=%s ...', user_raw_id)
        muser = get_or_create_m(self.session,
                                TwitterUser,
                                data=dict(raw_id=user_raw_id),
                                fb_uk='raw_id')
        logger.debug('Saving this user into twitter_user_union as well ...')
        create_or_update_muser(
            self.session,
            data=dict(raw_id=user_raw_id,
                      screen_name=jd['user']['screen_name'],
                      followers_count=jd['user']['followers_count'],
                      profile=jd['user'],
                      updated_at=created_at))
        # creating tweet
        logger.debug('2-0) Saving tweet raw_id=%s ...', tw_raw_id)
        if self.saved_tweet is True:
            mtweet = self.session.query(Tweet).filter_by(
                raw_id=tw_raw_id).one()
        else:
            mtweet = Tweet(raw_id=tw_raw_id,
                           json_data=jd,
                           created_at=created_at,
                           user_id=muser.id)
            self.session.add(mtweet)
            try:
                self.session.commit()
                logger.debug('Inserted tweet %r', tw_raw_id)
            except IntegrityError as e:
                logger.warning('Tweet %s existed in db: %s', tw_raw_id, e)
                self.session.rollback()
                return None
        tweet_id = mtweet.id
        # Saving all urls and mapping the saved id
        url_map = dict()
        logger.debug('2-0) Saving all urls and associating with tweet...')
        for url in urls_set:
            murl = get_or_create_murl(self.session,
                                      data=dict(raw=url),
                                      platform_id=self.platform_id)
            url_map[url] = murl.id
            # saving ass_tweet_url
            if self.saved_tweet is False:
                self.session.add(
                    AssTweetUrl(tweet_id=tweet_id, url_id=url_map[url]))
                try:
                    self.session.commit()
                except IntegrityError as e:
                    logger.error('ass_tweet_url IntegrityError, see: %s', e)
                    self.session.rollback()
        # 2-1) retweet, focusing on retweeted_status
        #               edge direction: from retweeted_user to current user
        if retweeted_status_id is not None:
            logger.debug(
                '2-1-a) Saving the retweeted user into twitter_user_union ...')
            retweeted_user_id = retweeted_user_jd['id']
            retweeted_screen_name = retweeted_user_jd['screen_name']
            create_or_update_muser(
                self.session,
                data=dict(raw_id=retweeted_user_id,
                          screen_name=retweeted_screen_name,
                          followers_count=retweeted_user_jd['followers_count'],
                          profile=retweeted_user_jd,
                          updated_at=created_at))
            # retweeted user has been saved above, should be removed from mentions
            try:
                mentions_set.remove((retweeted_user_id, retweeted_screen_name))
            except KeyError as e:
                logger.warning('Tweet %r: retweeted user not in mentions',
                               tw_raw_id)
            logger.debug('2-1-a) Saving edges for retweet ...')
            self._save_edges(url_map,
                             retweeted_jd['entities'],
                             tweet_id,
                             tw_raw_id,
                             from_raw_id=retweeted_user_id,
                             to_raw_id=user_raw_id,
                             is_quoted_url=False,
                             is_mention=False,
                             tweet_type='retweet')
        # 2-2) reply, focusing on current status
        #             edges direction: from current user to mentions
        if in_reply_to_status_id is not None:
            # mentioned users would be saved later
            logger.debug('2-1-b) Saving edges for reply ...')
            # in_reply_to_user
            self._save_edges(url_map,
                             jd['entities'],
                             tweet_id,
                             tw_raw_id,
                             from_raw_id=user_raw_id,
                             to_raw_id=in_reply_to_user_id,
                             is_quoted_url=False,
                             is_mention=False,
                             tweet_type='reply')
            # mentions
            for m in jd['entities']['user_mentions']:
                to_raw_id = m.get('id')
                if to_raw_id and to_raw_id != in_reply_to_user_id:
                    self._save_edges(url_map,
                                     jd['entities'],
                                     tweet_id,
                                     tw_raw_id,
                                     from_raw_id=user_raw_id,
                                     to_raw_id=to_raw_id,
                                     is_quoted_url=False,
                                     is_mention=True,
                                     tweet_type='reply')
        # 2-3) quote
        if quoted_status_id is not None:
            logger.debug(
                '2-1-c) Saving the quoted user into twitter_user_union ...')
            quoted_user_id = quoted_user_jd['id']
            quoted_screen_name = quoted_user_jd['screen_name']
            create_or_update_muser(
                self.session,
                data=dict(raw_id=quoted_user_id,
                          screen_name=quoted_screen_name,
                          followers_count=quoted_user_jd['followers_count'],
                          profile=quoted_user_jd,
                          updated_at=created_at))
            # 2-3-1) retweeted quote, focusing on quoted_status
            #                         treated as retweet edge
            if retweeted_status_id is not None:
                logger.debug(
                    '2-1-c) Saving edges for quoting part of retweet ...')
                self._save_edges(url_map,
                                 quoted_jd['entities'],
                                 tweet_id,
                                 tw_raw_id,
                                 from_raw_id=retweeted_user_jd['id'],
                                 to_raw_id=user_raw_id,
                                 is_quoted_url=True,
                                 is_mention=False,
                                 tweet_type='retweet')
            # 2-3-2) replied quote, focusing on quoted_status
            #                       treated as reply edge
            elif in_reply_to_status_id is not None:
                logger.debug(
                    '2-1-c) Saving edges for quoting part of reply ...')
                # in_reply_to_user
                self._save_edges(url_map,
                                 quoted_jd['entities'],
                                 tweet_id,
                                 tw_raw_id,
                                 from_raw_id=user_raw_id,
                                 to_raw_id=in_reply_to_user_id,
                                 is_quoted_url=True,
                                 is_mention=False,
                                 tweet_type='reply')
                # mentions
                for m in jd['entities']['user_mentions']:
                    to_raw_id = m.get('id')
                    if to_raw_id and to_raw_id != in_reply_to_user_id:
                        self._save_edges(url_map,
                                         quoted_jd['entities'],
                                         tweet_id,
                                         tw_raw_id,
                                         from_raw_id=user_raw_id,
                                         to_raw_id=to_raw_id,
                                         is_quoted_url=True,
                                         is_mention=True,
                                         tweet_type='reply')
            # 2-3-3) pure quote
            else:
                logger.debug(
                    '2-1-c) Saving edge for pure quote part of quote ...')
                self._save_edges(url_map,
                                 quoted_jd['entities'],
                                 tweet_id,
                                 tw_raw_id,
                                 from_raw_id=quoted_user_jd['id'],
                                 to_raw_id=user_raw_id,
                                 is_quoted_url=True,
                                 is_mention=False,
                                 tweet_type='quote')
                logger.debug(
                    '2-1-c) Saving edges for original part of quote ...')
                for m in jd['entities']['user_mentions']:
                    to_raw_id = m.get('id')
                    if to_raw_id:
                        self._save_edges(url_map,
                                         jd['entities'],
                                         tweet_id,
                                         tw_raw_id,
                                         from_raw_id=user_raw_id,
                                         to_raw_id=to_raw_id,
                                         is_quoted_url=False,
                                         is_mention=True,
                                         tweet_type='quote')
        # 2-4) original tweet
        if retweeted_status_id is None and in_reply_to_status_id is None\
            and quoted_status_id is None and 'entities' in jd and\
            'user_mentions' in jd['entities']:
            logger.debug('2-1-d) Saving edges for original tweet ...')
            for m in jd['entities']['user_mentions']:
                to_raw_id = m.get('id')
                if to_raw_id:
                    self._save_edges(url_map,
                                     jd['entities'],
                                     tweet_id,
                                     tw_raw_id,
                                     from_raw_id=user_raw_id,
                                     to_raw_id=to_raw_id,
                                     is_quoted_url=False,
                                     is_mention=True,
                                     tweet_type='origin')
        # saving all mentions ...
        logger.debug('3) Saving all mentions ...')
        # add the in_reply_to_user
        mentions_set.add((in_reply_to_user_id, in_reply_to_screen_name))
        for user_raw_id, screen_name in mentions_set:
            create_or_update_muser(self.session,
                                   data=dict(raw_id=user_raw_id,
                                             screen_name=screen_name,
                                             updated_at=created_at))
        # saving hashtags
        logger.debug('3) creating hashtags')
        if self.saved_tweet is False:
            for hashtag in hashtags_set:
                mhashtag = get_or_create_m(self.session,
                                           Hashtag,
                                           data=dict(text=hashtag),
                                           fb_uk='text')
                self.session.add(
                    AssTweetHashtag(tweet_id=tweet_id, hashtag_id=mhashtag.id))
                try:
                    self.session.commit()
                except IntegrityError as e:
                    logger.error('ass_tweet_hashtag IntegrityError, see: %s',
                                 e)
                    self.session.rollback()
        # saving associate tweet
        logger.debug('3 Saving ass_tweet ...')
        if self.saved_tweet is False:
            create_m(self.session,
                     AssTweet,
                     data=dict(id=tweet_id,
                               retweeted_status_id=retweeted_status_id,
                               quoted_status_id=quoted_status_id,
                               in_reply_to_status_id=in_reply_to_status_id))
        logger.debug('Parsing one tweet, done.')