コード例 #1
0
 def __get_files(message):
     files = []
     for file in message:
         new_file = {
             'file_user_id':
             file['user_id'],
             'file_post_id':
             file['post_id'],
             'file_create_at':
             unixtime_to_datetime(file['create_at'] / 1000).isoformat(),
             'file_update_at':
             unixtime_to_datetime(file['update_at'] / 1000).isoformat(),
             'file_delete_at':
             None if file['delete_at'] == 0 else unixtime_to_datetime(
                 file['delete_at'] / 1000).isoformat(),
             'file_name':
             file['name'],
             'file_extension':
             file['extension'],
             'file_size':
             file['size'],
             'file_type':
             file['mime_type'],
             'file_mini_preview':
             file['mini_preview']
         }
         files.append(new_file)
     return files
コード例 #2
0
 def add_update_date(self, item):
     """ All item['updated_on'] from perceval is epoch """
     updated = unixtime_to_datetime(item['updated_on'])
     timestamp = unixtime_to_datetime(item['timestamp'])
     item['metadata__updated_on'] = updated.isoformat()
     # Also add timestamp used in incremental enrichment
     item['metadata__timestamp'] = timestamp.isoformat()
コード例 #3
0
def get_to_date(es_in, in_index, out_index, repository_url, interval):
    """ Get the appropriate to_date value for incremental insertion. """
    study_data_available = False

    if es_in.indices.exists(index=out_index):
        last_study_date = es_in.search(index=out_index,
                                       body=get_last_study_date(
                                           repository_url,
                                           interval))["aggregations"]["1"]

        if "value_as_string" in last_study_date and last_study_date[
                "value_as_string"]:
            study_data_available = True
            to_date = str_to_datetime(last_study_date["value_as_string"])
        elif "value" in last_study_date and last_study_date["value"]:
            study_data_available = True
            try:
                to_date = unixtime_to_datetime(last_study_date["value"])
            except Exception:
                to_date = unixtime_to_datetime(last_study_date["value"] / 1000)

    if not study_data_available:
        first_item_date = es_in.search(
            index=in_index, body=get_first_enriched_date(repository_url)
        )["aggregations"]["1"]["hits"]["hits"][0]["_source"]

        to_date = str_to_datetime(first_item_date["metadata__updated_on"])

    return to_date
コード例 #4
0
    def test_dates(self):
        """Check if it converts some timestamps to datetime objects."""

        date = unixtime_to_datetime(0)
        expected = datetime.datetime(1970, 1, 1, 0, 0, 0,
                                     tzinfo=dateutil.tz.tzutc())
        self.assertIsInstance(date, datetime.datetime)
        self.assertEqual(date, expected)

        date = unixtime_to_datetime(1426868155.0)
        expected = datetime.datetime(2015, 3, 20, 16, 15, 55,
                                     tzinfo=dateutil.tz.tzutc())
        self.assertIsInstance(date, datetime.datetime)
        self.assertEqual(date, expected)
コード例 #5
0
 def __get_reactions(message):
     reactions = []
     for reaction in message:
         new_reaction = {
             'reaction_user_id': reaction['user_id'],
             'reaction_post_id': reaction['post_id'],
             'reaction_emoji_name': reaction['emoji_name'],
             'reaction_create_at': unixtime_to_datetime(reaction['create_at'] / 1000).isoformat(),
             'reaction_update_at': unixtime_to_datetime(reaction['update_at'] / 1000).isoformat(),
             'reaction_delete_at': None if reaction['delete_at'] == 0 else unixtime_to_datetime(
                 reaction['delete_at'] / 1000).isoformat()
         }
         reactions.append(new_reaction)
     return reactions
コード例 #6
0
    def get_rich_comment(self, item, answer, comment):
        ecomment = self.get_rich_item(item)  # reuse all fields from item
        ecomment['id'] = str(ecomment['id']) + '_' + str(answer['id']) + '_' + str(comment['id'])
        ecomment['url'] = item['data']['url'] + "/?answer="
        ecomment['url'] += answer['id'] + '#post-id-' + answer['id']
        if 'author' in comment:
            # Not sure if this format is present in some version of askbot
            ecomment['author_askbot_user_name'] = comment['author']['username']
            ecomment['author_askbot_id'] = str(comment['author']['id'])
            ecomment['author_url'] = ecomment['origin'] + '/users/'
            ecomment['author_url'] += comment['author']['id'] + '/' + comment['author']['username']

        elif 'user_display_name' in comment:
            ecomment['author_askbot_user_name'] = comment['user_display_name']
            ecomment['author_askbot_id'] = str(comment['user_id'])
        if 'summary' in comment:
            ecomment['summary'] = comment['summary']
        ecomment['score'] = int(comment['score']) if comment['score'] else 0

        dfield = 'added_at'
        if 'comment_added_at' in comment:
            dfield = 'comment_added_at'

        if self.sortinghat:
            if dfield == 'added_at':
                comment['added_at_date'] = unixtime_to_datetime(float(comment[dfield])).isoformat()
            else:
                comment['added_at_date'] = comment[dfield]
            ecomment.update(self.get_item_sh(comment, date_field="added_at_date"))
            if ecomment['author_user_name'] != ecomment['author_askbot_user_name']:
                logger.warning('Bad SH identity in askbot comment. Found %s expecting %s',
                               ecomment['author_user_name'], ecomment['author_askbot_user_name'])

        if dfield == 'added_at':
            comment_at = unixtime_to_datetime(float(comment[dfield]))
        else:
            comment_at = str_to_datetime(comment[dfield])

        added_at = unixtime_to_datetime(float(item['data']["added_at"]))
        ecomment['time_from_question'] = get_time_diff_days(added_at, comment_at)
        ecomment['type'] = 'comment'
        ecomment.update(self.get_grimoire_fields(comment_at.isoformat(), ecomment['type']))

        # Clean items fields not valid in comments
        for f in ['is_askbot_question', 'author_reputation', 'author_badges', 'is_correct', 'comment_count']:
            if f in ecomment:
                ecomment.pop(f)

        return ecomment
コード例 #7
0
ファイル: scheduler.py プロジェクト: sduenas/arthur
    def _handle_successful_job(self, event):
        """Handle successufl jobs"""

        job = rq.job.Job.fetch(event.job_id, connection=self.conn)

        result = job.result
        task_id = job.kwargs['task_id']

        try:
            task = self.registry.get(task_id)
        except NotFoundError:
            logger.warning(
                "Task %s not found; related job #%s will not be rescheduled",
                task_id, job.id)
            return

        if task.archiving_cfg and task.archiving_cfg.fetch_from_archive:
            logger.info("Job #%s (task: %s) successfully finished", job.id,
                        task_id)
            task.status = TaskStatus.COMPLETED
            return

        if result.nitems > 0:
            task.backend_args['next_from_date'] = unixtime_to_datetime(
                result.max_date)

            if result.offset:
                task.backend_args['next_offset'] = result.offset

        delay = task.scheduling_cfg.delay if task.scheduling_cfg else WAIT_FOR_QUEUING

        self._scheduler.schedule_task(task_id, delay=delay)

        logger.info("Task: %s re-scheduled", task_id)
コード例 #8
0
    def run(self, backend_args, archive_args=None, resume=False):
        """Run the backend with the given parameters.

        The method will run the backend assigned to this job,
        storing the fetched items in a Redis queue. The ongoing
        status of the job, can be accessed through the property
        `result`. When `resume` is set, the job will start from
        the last execution, overwriting 'from_date' and 'offset'
        parameters, if needed.

        Setting to `True` the parameter `fetch_from_archive`, items can
        be fetched from the archive assigned to this job.

        Any exception during the execution of the process will
        be raised.

        :param backend_args: parameters used to un the backend
        :param archive_args: archive arguments
        :param resume: fetch items starting where the last
            execution stopped
        """
        args = backend_args.copy()

        if archive_args:
            self.initialize_archive_manager(archive_args['archive_path'])

        if not resume:
            max_date = backend_args.get('from_date', None)
            offset = backend_args.get('offset', None)

            if max_date:
                max_date = datetime_to_utc(max_date).timestamp()

            self._result = JobResult(self.job_id,
                                     self.task_id,
                                     self.backend,
                                     self.category,
                                     None,
                                     max_date,
                                     0,
                                     offset=offset,
                                     nresumed=0)
        else:
            if self.result.max_date:
                args['from_date'] = unixtime_to_datetime(self.result.max_date)
            if self.result.offset:
                args['offset'] = self.result.offset
            self._result.nresumed += 1

        for item in self._execute(args, archive_args):
            self.conn.rpush(self.qitems, pickle.dumps(item))

            self._result.nitems += 1
            self._result.last_uuid = item['uuid']

            if not self.result.max_date or self.result.max_date < item[
                    'updated_on']:
                self._result.max_date = item['updated_on']
            if 'offset' in item:
                self._result.offset = item['offset']
コード例 #9
0
ファイル: meetup.py プロジェクト: bloriot97/grimoirelab-elk
    def get_rich_item_comments(self, comments, eitem):
        for comment in comments:
            ecomment = copy.deepcopy(eitem)
            created = unixtime_to_datetime(comment['created'] / 1000).isoformat()
            ecomment['url'] = comment['link']
            ecomment['id'] = ecomment['id'] + '_comment_' + str(comment['id'])
            ecomment['comment'] = comment['comment']
            ecomment['like_count'] = comment['like_count']
            ecomment['type'] = 'comment'
            ecomment.update(self.get_grimoire_fields(created, ecomment['type']))
            ecomment.pop('is_meetup_meetup')
            # event host fields: author of the event
            member = comment['member']
            if 'photo' in member:
                ecomment['member_photo_url'] = member['photo']['photo_link']
                ecomment['member_photo_id'] = member['photo']['id']
                ecomment['member_photo_type'] = member['photo']['type']
            if 'event_context' in member:
                ecomment['member_is_host'] = member['event_context']['host']
            ecomment['member_id'] = member['id']
            ecomment['member_name'] = member['name']
            ecomment['member_url'] = "https://www.meetup.com/members/" + str(member['id'])

            if self.sortinghat:
                ecomment.update(self.get_item_sh(comment))

            yield ecomment
コード例 #10
0
ファイル: meetup.py プロジェクト: bloriot97/grimoirelab-elk
    def get_rich_item_rsvps(self, rsvps, eitem):
        for rsvp in rsvps:
            ersvp = copy.deepcopy(eitem)
            ersvp['type'] = 'rsvp'
            created = unixtime_to_datetime(rsvp['created'] / 1000).isoformat()
            ersvp.update(self.get_grimoire_fields(created, ersvp['type']))
            ersvp.pop('is_meetup_meetup')
            # event host fields: author of the event
            member = rsvp['member']
            if 'photo' in member:
                ersvp['member_photo_url'] = member['photo']['photo_link']
                ersvp['member_photo_id'] = member['photo']['id']
                ersvp['member_photo_type'] = member['photo']['type']
            ersvp['member_is_host'] = member['event_context']['host']
            ersvp['member_id'] = member['id']
            ersvp['member_name'] = member['name']
            ersvp['member_url'] = "https://www.meetup.com/members/" + str(member['id'])

            ersvp['id'] = ersvp['id'] + '_rsvp_' + str(rsvp['event']['id']) + "_" + str(member['id'])
            ersvp['url'] = "https://www.meetup.com/members/" + str(member['id'])

            ersvp['rsvps_guests'] = rsvp['guests']
            ersvp['rsvps_updated'] = rsvp['updated']
            ersvp['rsvps_response'] = rsvp['response']

            if self.sortinghat:
                ersvp.update(self.get_item_sh(rsvp))

            yield ersvp
コード例 #11
0
    def _handle_successful_job(self, job):
        """Handle successufl jobs"""

        result = job.result
        task_id = job.kwargs['task_id']

        try:
            task = self.registry.get(task_id)
        except NotFoundError:
            logger.warning("Task %s not found; related job #%s will not be rescheduled",
                           task_id, job.id)
            return

        if task.archiving_cfg and task.archiving_cfg.fetch_from_archive:
            logger.info("Job #%s (task: %s) successfully finished", job.id, task_id)
            return

        if result.nitems > 0:
            task.backend_args['next_from_date'] = unixtime_to_datetime(result.max_date)

            if result.offset:
                task.backend_args['next_offset'] = result.offset

        job_args = self._build_job_arguments(task)

        delay = task.scheduling_cfg.delay if task.scheduling_cfg else WAIT_FOR_QUEUING

        job_id = self._scheduler.schedule_job_task(Q_UPDATING_JOBS,
                                                   task_id, job_args,
                                                   delay=delay)

        logger.info("Job #%s (task: %s, old job: %s) re-scheduled",
                    job_id, task_id, job.id)
コード例 #12
0
    def __fetch_pull_requests(self, from_date, to_date):
        """Fetch the pull requests"""

        raw_pulls = self.client.pulls()

        for raw_pull in raw_pulls:
            pulls = json.loads(raw_pull)
            for pull in pulls['values']:

                compare_time = unixtime_to_datetime(float(pull['updatedDate'])/1000)

                if pull['updatedDate'] and to_date < compare_time < from_date:
                    return

                self.__init_extra_pull_fields(pull)

                pull['commits_data'] = self.__get_pull_commits(pull['id'])
                pull['comments_data'] = self.__get_pull_comments(pull['id'], from_date, to_date)

                for field in TARGET_PULL_FIELDS:
                    if field not in pull:
                        continue

                    if field == 'author':
                        pull[field + '_data'] = self.__get_user(pull[field]['user']['slug'])
                    elif field == 'reviewers':
                        for reviewer in pull[field]:
                            pull[field + '_data'].append(self.__get_user(reviewer['user']['slug']))

                yield pull
コード例 #13
0
    def __call__(self, event):
        result = event.payload
        job_id = event.job_id
        task_id = event.task_id

        try:
            task = self.task_scheduler.registry.get(task_id)
        except NotFoundError:
            logger.debug(
                "Task %s not found; orphan event %s for job #%s ignored",
                task_id, event.uuid, job_id)
            return False

        if task.archiving_cfg and task.archiving_cfg.fetch_from_archive:
            task.status = TaskStatus.COMPLETED
            logger.info("Job #%s (task: %s - archiving) finished successfully",
                        job_id, task_id)
            return True

        if result.nitems > 0:
            task.backend_args['next_from_date'] = unixtime_to_datetime(
                result.max_date)

            if result.offset:
                task.backend_args['next_offset'] = result.offset

        delay = task.scheduling_cfg.delay if task.scheduling_cfg else WAIT_FOR_QUEUING

        self.task_scheduler.schedule_task(task_id, delay=delay)

        logger.info("Task: %s re-scheduled", task_id)

        return True
コード例 #14
0
    def get_rich_answer(self, item, answer):
        eanswer = self.get_rich_item(item)  # reuse all fields from item
        eanswer['id'] = str(eanswer['id']) + '_' + str(answer['id'])
        eanswer['url'] = item['data']['url'] + "/?answer="
        eanswer['url'] += answer['id'] + '#post-id-' + answer['id']
        if type(answer['answered_by']) is dict:
            eanswer['author_askbot_user_name'] = answer['answered_by'][
                'username']
            eanswer['author_askbot_id'] = str(answer['answered_by']['id'])
            eanswer['author_badges'] = answer['answered_by']['badges']
            eanswer['author_reputation'] = int(
                answer['answered_by']['reputation'])
            eanswer['author_url'] = eanswer['origin'] + '/users/'
            eanswer['author_url'] += answer['answered_by']['id'] + '/'
            eanswer['author_url'] += answer['answered_by']['username']

        eanswer['summary'] = answer['summary']
        eanswer['is_accepted_answer'] = 1 if answer['accepted'] else 0
        eanswer['answer_status'] = "accepted" if answer[
            'accepted'] else "not_accepted"
        eanswer['score'] = int(answer['score']) if answer['score'] else 0
        if 'is_correct' in answer:
            eanswer['is_correct'] = 1

        if self.sortinghat:
            answer['added_at_date'] = unixtime_to_datetime(
                float(answer["added_at"])).isoformat()
            eanswer.update(self.get_item_sh(answer,
                                            date_field="added_at_date"))
            if 'author_askbot_user_name' in eanswer and eanswer[
                    'author_user_name'] != eanswer['author_askbot_user_name']:
                logger.warning(
                    '[askbot] Bad SH identity in askbot answer. Found {} expecting {}'
                    .format(eanswer['author_user_name'],
                            eanswer['author_askbot_user_name']))
        answer_at = unixtime_to_datetime(float(answer["added_at"]))
        added_at = unixtime_to_datetime(float(item['data']["added_at"]))
        eanswer['time_from_question'] = get_time_diff_days(added_at, answer_at)
        eanswer['type'] = 'answer'
        eanswer.update(
            self.get_grimoire_fields(answer_at.isoformat(), eanswer['type']))

        # Clean items fields not valid in comments
        eanswer.pop('is_askbot_question')

        return eanswer
コード例 #15
0
    def __convert_str_to_datetime(text):
        try:
            str_date = str_to_datetime(text)
        except Exception:
            try:
                str_date = unixtime_to_datetime(text)
            except Exception:
                str_date = None

        return str_date
コード例 #16
0
 def get_time_to_first_attention(self, item):
     """Get the first date at which a comment was made to the issue by someone
     other than the user who created the issue
     """
     comment_dates = [
         unixtime_to_datetime(float(comment['date_created'])).isoformat()
         for comment in item['comments']
         if item['user']['name'] != comment['user']['name']
     ]
     if comment_dates:
         return min(comment_dates)
     return None
コード例 #17
0
    def _fix_review_dates(self, item):
        """Convert dates so ES detect them"""

        for date_field in ['timestamp', 'createdOn', 'lastUpdated']:
            if date_field in item.keys():
                date_ts = item[date_field]
                item[date_field] = unixtime_to_datetime(date_ts).isoformat()

        if 'patchSets' in item.keys():
            for patch in item['patchSets']:
                pdate_ts = patch['createdOn']
                patch['createdOn'] = unixtime_to_datetime(pdate_ts).isoformat()

                if 'approvals' in patch:
                    for approval in patch['approvals']:
                        adate_ts = approval['grantedOn']
                        approval['grantedOn'] = unixtime_to_datetime(adate_ts).isoformat()

        if 'comments' in item.keys():
            for comment in item['comments']:
                cdate_ts = comment['timestamp']
                comment['timestamp'] = unixtime_to_datetime(cdate_ts).isoformat()
コード例 #18
0
    def update(self, item):
        """Update the summary attributes by accessing the item data.

        :param item: a Perceval item
        """
        self.fetched += 1
        self.last_uuid = item['uuid']

        updated_on = unixtime_to_datetime(item['updated_on'])
        self.min_updated_on = updated_on if not self.min_updated_on else min(self.min_updated_on, updated_on)
        self.max_updated_on = updated_on if not self.max_updated_on else max(self.max_updated_on, updated_on)
        self.last_updated_on = updated_on

        offset = item.get('offset', None)
        if offset is not None:
            self.last_offset = offset
            self.min_offset = offset if self.min_offset is None else min(self.min_offset, offset)
            self.max_offset = offset if self.max_offset is None else max(self.max_offset, offset)
コード例 #19
0
    def __fetch_merge_requests(self, from_date):
        """Fetch the merge requests."""

        fetch_completed = False
        fetch_from_date = from_date
        last_date = fetch_from_date

        while not fetch_completed:
            try:
                for mr_item in self.__fetch_merge_requests_data(fetch_from_date):
                    last_date = unixtime_to_datetime(self.metadata_updated_on(mr_item))
                    yield mr_item
            except _OutdatedMRsList:
                fetch_from_date = last_date
                logger.debug("MRs list is outdated. Recalculating MR list starting on %s",
                             fetch_from_date)
            else:
                fetch_completed = True
コード例 #20
0
ファイル: meetup.py プロジェクト: bloriot97/grimoirelab-elk
    def get_item_sh(self, item):
        """ Add sorting hat enrichment fields  """

        sh_fields = {}

        # Not shared common get_item_sh because it is pretty specific
        if 'member' in item:
            # comment and rsvp
            identity = self.get_sh_identity(item['member'])
        elif 'event_hosts' in item:
            # meetup event
            identity = self.get_sh_identity(item['event_hosts'][0])
        else:
            return sh_fields

        created = unixtime_to_datetime(item['created'] / 1000)
        sh_fields = self.get_item_sh_fields(identity, created)

        return sh_fields
コード例 #21
0
    def test_delete_items_wrong_retention(self):
        """Test whether no items are deleted if retention isn't defined or negative"""

        items = json.loads(read_file('data/git.json'))
        for item in items:
            timestamp = unixtime_to_datetime(item['timestamp'])
            item['timestamp'] = timestamp.isoformat()

        elastic = ElasticSearch(self.es_con, self.target_index, GitOcean.mapping)
        new_items = elastic.bulk_upload(items, field_id="uuid")
        self.assertEqual(new_items, 11)

        url = self.es_con + '/' + self.target_index + '/_count'

        elastic.delete_items(retention_time=None, time_field='timestamp')
        left_items = elastic.requests.get(url).json()['count']
        self.assertEqual(left_items, 11)

        elastic.delete_items(retention_time=-1, time_field='timestamp')
        left_items = elastic.requests.get(url).json()['count']
        self.assertEqual(left_items, 11)
コード例 #22
0
    def test_delete_items(self):
        """Test whether items are correctly deleted"""

        items = json.loads(read_file('data/git.json'))
        for item in items:
            timestamp = unixtime_to_datetime(item['timestamp'])
            item['timestamp'] = timestamp.isoformat()

        elastic = ElasticSearch(self.es_con, self.target_index,
                                GitOcean.mapping)
        new_items = elastic.bulk_upload(items, field_id="uuid")
        self.assertEqual(new_items, 9)

        url = self.es_con + '/' + self.target_index + '/_count'

        elastic.delete_items(retention_time=90000000, time_field='timestamp')
        left_items = elastic.requests.get(url).json()['count']
        self.assertEqual(left_items, 9)

        elastic.delete_items(retention_time=1, time_field='timestamp')
        left_items = elastic.requests.get(url).json()['count']
        self.assertEqual(left_items, 0)
コード例 #23
0
    def __get_pull_comments(self, pr_number, from_date, to_date):

        comments = []
        group_pull_comments = self.client.pull_comments(pr_number)

        for raw_pull_comments in group_pull_comments:
            group_pull_comments = json.loads(raw_pull_comments)

            for comment in group_pull_comments['values']:

                compare_time = unixtime_to_datetime(float(comment['createdDate']) / 1000)

                if to_date < compare_time < from_date:
                    return comments

                if 'comment' not in comment:
                    continue

                if 'user' in comment:
                    comment['user_data'] = self.__get_user(comment['user']['slug'])

                comments.append(comment)

            return comments
コード例 #24
0
    def get_rich_item(self, item):
        eitem = {}

        self.__fill_phab_ids(item['data'])

        for f in self.RAW_FIELDS_COPY:
            if f in item:
                eitem[f] = item[f]
            else:
                eitem[f] = None
        # The real data
        phab_item = item['data']

        # data fields to copy
        copy_fields = ["phid", "id", "type"]
        for f in copy_fields:
            if f in phab_item:
                eitem[f] = phab_item[f]
            else:
                eitem[f] = None
        # Fields which names are translated
        map_fields = {
            "id": "bug_id"
        }
        for f in map_fields:
            if f in phab_item:
                eitem[map_fields[f]] = phab_item[f]
            else:
                eitem[map_fields[f]] = None

        eitem['num_changes'] = len(phab_item['transactions'])

        if 'authorData' in phab_item['fields'] and phab_item['fields']['authorData']:
            # eitem['author_roles'] = ",".join(phab_item['fields']['authorData']['roles'])
            eitem['author_roles'] = phab_item['fields']['authorData']['roles']
            eitem['author_userName'] = phab_item['fields']['authorData']['userName']
            eitem['author_realName'] = phab_item['fields']['authorData']['realName']
        if 'ownerData' in phab_item['fields'] and phab_item['fields']['ownerData']:
            eitem['assigned_to_roles'] = phab_item['fields']['ownerData']['roles']
            eitem['assigned_to_userName'] = phab_item['fields']['ownerData']['userName']
            eitem['assigned_to_realName'] = phab_item['fields']['ownerData']['realName']

        eitem['priority'] = phab_item['fields']['priority']['name']
        eitem['priority_value'] = phab_item['fields']['priority']['value']
        eitem['status'] = phab_item['fields']['status']['name']
        eitem['creation_date'] = unixtime_to_datetime(phab_item['fields']['dateCreated']).isoformat()
        eitem['modification_date'] = unixtime_to_datetime(phab_item['fields']['dateModified']).isoformat()
        eitem['update_date'] = unixtime_to_datetime(item['updated_on']).isoformat()
        # raise
        eitem['main_description'] = phab_item['fields']['name']
        eitem['main_description_analyzed'] = eitem['main_description']
        eitem['url'] = eitem['origin'] + "/T" + str(eitem['bug_id'])

        # Time to assign (time to open -> time to assign)
        eitem['time_to_assign_days'] = None
        # Time to attend (time to assign-> time to first activity from assignee)
        eitem['time_to_attend_days'] = None
        # Time to close (time open -> time last updated for closed tasks)
        # We can improve it later using events: time open event -> time resolved event
        eitem['time_to_close_days'] = None
        if eitem['status'] not in [TASK_OPEN_STATUS, 'Spite', 'Stalled']:
            eitem['time_to_close_days'] = \
                get_time_diff_days(eitem['creation_date'], eitem['update_date'])
        # Time open (time to open -> now): with painless
        # Time open using the enrich date. Field needed for filtering.
        eitem['time_open_days_enrich'] = get_time_diff_days(eitem['creation_date'],
                                                            datetime_utcnow().replace(tzinfo=None))
        # Time from last update (time last update -> now): with painless

        eitem['changes'] = len(phab_item['transactions'])
        # Number of assignments changes
        eitem['changes_assignment'] = 0
        # Number of assignees in the changes
        eitem['changes_assignee_number'] = 0
        # List the changes assignees
        changes_assignee_list = []
        first_assignee_phid = None
        first_assignee_date = None
        # We need to revert them to go from older to newer
        phab_item['transactions'].reverse()
        for change in phab_item['transactions']:
            change_date = unixtime_to_datetime(float(change['dateCreated'])).isoformat()
            if change["transactionType"] == "reassign":
                if not eitem['time_to_assign_days']:
                    eitem['time_to_assign_days'] = get_time_diff_days(eitem['creation_date'], change_date)
                    first_assignee_phid = change['newValue']
                    first_assignee_date = change_date
                if 'authorData' in change and change['authorData'] and 'userName' in change['authorData'] \
                        and change['authorData']['userName'] not in changes_assignee_list:
                    changes_assignee_list.append(change['authorData']['userName'])
                eitem['changes_assignment'] += 1
            if not eitem['time_to_attend_days'] and first_assignee_phid:
                if 'authorData' in change and change['authorData'] and change['authorData']['phid'] == first_assignee_phid:
                    eitem['time_to_attend_days'] = get_time_diff_days(first_assignee_date, change_date)
        eitem['changes_assignee_number'] = len(changes_assignee_list)
        eitem['changes_assignee_list'] = ','.join(changes_assignee_list)
        eitem['comments'] = 0
        for tr in phab_item['transactions']:
            if tr['comments']:
                eitem['comments'] += 1

        eitem['tags'] = []
        for project in phab_item['projects']:
            if project:
                eitem['tags'].append(project['name'])
        eitem['tags_analyzed'] = eitem['tags']
        eitem['tags_custom_analyzed'] = eitem['tags']

        if self.sortinghat:
            eitem.update(self.get_item_sh(item, self.roles))

        if self.prjs_map:
            eitem.update(self.get_item_project(eitem))

        eitem.update(self.get_grimoire_fields(eitem['creation_date'], "task"))

        # Support old fields used in maniphest panel T2305
        eitem['timeopen_days'] = eitem['time_open_days_enrich']
        assigned_to = {}
        for f in eitem.keys():
            if 'ownerData' in f:
                # Copy all ownerData data fields to assigned_to fields
                of = f.split('ownerData')[1]
                assigned_to['assigned_to' + of] = eitem[f]
        eitem.update(assigned_to)

        self.add_repository_labels(eitem)
        self.add_metadata_filter_raw(eitem)
        return eitem
コード例 #25
0
    def get_rich_item(self, item):
        eitem = {}

        for f in self.RAW_FIELDS_COPY:
            if f in item:
                eitem[f] = item[f]
            else:
                eitem[f] = None

        # The real data
        message = item['data']

        eitem["reply_count"] = 0  # be sure it is always included

        # data fields to copy
        copy_fields = [
            "text", "type", "reply_count", "subscribed", "subtype",
            "unread_count", "user"
        ]
        for f in copy_fields:
            if f in message:
                eitem[f] = message[f]
            else:
                eitem[f] = None

        eitem['text_analyzed'] = eitem['text']

        eitem['number_attachs'] = 0
        if 'attachments' in message and message['attachments']:
            eitem['number_attachs'] = len(message['attachments'])

        eitem['reaction_count'] = 0
        if 'reactions' in message:
            eitem['reaction_count'] = len(message['reactions'])
            eitem['reactions'] = []
            for rdata in message['reactions']:
                for i in range(0, rdata['count']):
                    eitem['reactions'].append(rdata["name"])

        if 'file' in message:
            eitem['file_type'] = message['file']['pretty_type']
            eitem['file_title'] = message['file']['title']
            eitem['file_size'] = message['file']['size']
            eitem['file_name'] = message['file']['name']
            eitem['file_mode'] = message['file']['mode']
            eitem['file_is_public'] = message['file']['is_public']
            eitem['file_is_external'] = message['file']['is_external']
            eitem['file_id'] = message['file']['id']
            eitem['file_is_editable'] = message['file']['editable']

        if 'user_data' in message:
            eitem['team_id'] = None  # not exists in Mattermost
            if 'timezone' in message['user_data']:
                if message['user_data']['timezone']['useAutomaticTimezone']:
                    eitem['tz'] = message['user_data']['timezone'][
                        'automaticTimezone']
                else:
                    eitem['tz'] = message['user_data']['timezone'][
                        'manualTimezone']
                # tz must be in -12h to 12h interval, so seconds -> hours
                if eitem['tz']:
                    eitem['tz'] = round(int(eitem['tz']) / (60 * 60))
            if 'is_admin' in message['user_data']:
                eitem['is_admin'] = message['user_data']['is_admin']
            if 'is_owner' in message['user_data']:
                eitem['is_owner'] = message['user_data']['is_owner']
            if 'is_primary_owner' in message['user_data']:
                eitem['is_primary_owner'] = message['user_data'][
                    'is_primary_owner']
            if 'profile' in message['user_data']:
                if 'title' in message['user_data']['profile']:
                    eitem['profile_title'] = message['user_data']['profile'][
                        'title']
                eitem['avatar'] = message['user_data']['profile']['image_32']

        eitem['channel_name'] = message['channel_data']['name']
        eitem['channel_id'] = message['channel_data']['id']
        eitem['channel_created'] = unixtime_to_datetime(
            message['channel_data']['create_at'] / 1000).isoformat()
        eitem['channel_member_count'] = None

        eitem = self.__convert_booleans(eitem)

        if self.sortinghat:
            eitem.update(self.get_item_sh(item))

        if self.prjs_map:
            eitem.update(self.get_item_project(eitem))

        eitem.update(
            self.get_grimoire_fields(item["metadata__updated_on"], "message"))

        self.add_repository_labels(eitem)
        self.add_metadata_filter_raw(eitem)
        return eitem
コード例 #26
0
    def get_rich_events(self, item):
        """
        In the events there are some common fields with the task. The name
        of the field must be the same in the task and in the event
        so we can filer using it in task and event at the same time.

        * Fields that don't change: the field does not change with the events
        in a task so the value is always the same in the events of a task.

        * Fields that change: the value of teh field changes with events
        """
        # To get values from the task
        eitem = self.get_rich_item(item)

        # Fields that don't change never
        task_fields_nochange = ['author_userName', 'creation_date', 'url', 'id', 'bug_id']

        # Follow changes in this fields
        task_fields_change = ['priority_value', 'status', 'assigned_to_userName', 'tags_custom_analyzed']
        task_change = {}
        for f in task_fields_change:
            task_change[f] = None
        task_change['status'] = TASK_OPEN_STATUS
        task_change['tags_custom_analyzed'] = eitem['tags_custom_analyzed']

        # Events are in transactions field (changes in fields)
        transactions = item['data']['transactions']

        if not transactions:
            return []

        for t in transactions:
            event = {}
            # Needed for incremental updates from the item
            event['metadata__updated_on'] = item['metadata__updated_on']
            event['origin'] = item['origin']
            # Real event data
            event['transactionID'] = t['transactionID']
            event['type'] = t['transactionType']
            event['username'] = None
            if 'authorData' in t and 'userName' in t['authorData']:
                event['event_author_name'] = t['authorData']['userName']
            event['update_date'] = unixtime_to_datetime(float(t['dateCreated'])).isoformat()
            event['oldValue'] = ''
            event['newValue'] = ''
            if event['type'] == 'core:edge':
                for val in t['oldValue']:
                    if val in self.phab_ids_names:
                        val = self.phab_ids_names[val]
                    event['oldValue'] += "," + val
                event['oldValue'] = event['oldValue'][1:]  # remove first comma
                for val in t['newValue']:
                    if val in self.phab_ids_names:
                        val = self.phab_ids_names[val]
                    event['newValue'] += "," + val
                event['newValue'] = event['newValue'][1:]  # remove first comma
            elif event['type'] in ['status', 'description', 'priority', 'reassign', 'title', 'space', 'core:create', 'parent']:
                # Convert to str so the field is always a string
                event['oldValue'] = str(t['oldValue'])
                if event['oldValue'] in self.phab_ids_names:
                    event['oldValue'] = self.phab_ids_names[event['oldValue']]
                event['newValue'] = str(t['newValue'])
                if event['newValue'] in self.phab_ids_names:
                    event['newValue'] = self.phab_ids_names[event['newValue']]
            elif event['type'] == 'core:comment':
                event['newValue'] = t['comments']
            elif event['type'] == 'core:subscribers':
                event['newValue'] = ",".join(t['newValue'])
            else:
                # logger.debug("Event type %s old to new value not supported", t['transactionType'])
                pass

            for f in task_fields_nochange:
                # The field name must be the same than in task for filtering
                event[f] = eitem[f]

            # To track history of some fields
            if event['type'] in ['status']:
                task_change['status'] = event['newValue']
            elif event['type'] == 'priority':
                task_change['priority'] = event['newValue']
            elif event['type'] == 'core:edge':
                task_change['tags_custom_analyzed'] = [event['newValue']]
            if event['type'] in ['reassign']:
                # Try to get the userName and not the user id
                if event['newValue'] in self.phab_ids_names:
                    task_change['assigned_to_userName'] = self.phab_ids_names[event['newValue']]
                    event['newValue'] = task_change['assigned_to_userName']
                else:
                    task_change['assigned_to_userName'] = event['newValue']
                if event['oldValue'] in self.phab_ids_names:
                    # Try to get the userName and not the user id
                    event['oldValue'] = self.phab_ids_names[event['oldValue']]

            for f in task_change:
                event[f] = task_change[f]

            yield event
コード例 #27
0
    def get_last_item_field(self, field, filters_=[], offset=False):
        """Find the offset/date of the last item stored in the index.

        :param field: field with the data
        :param filters_: additional filters to find the date
        :param offset: if True, returns the offset field instead of date field
        """
        last_value = None

        url = self.index_url
        url += "/_search"

        if filters_ is None:
            filters_ = []

        terms = []
        for filter_ in filters_:
            if not filter_:
                continue
            term = '''{"term" : { "%s" : "%s"}}''' % (filter_['name'],
                                                      filter_['value'])
            terms.append(term)

        data_query = '''"query": {"bool": {"filter": [%s]}},''' % (
            ','.join(terms))

        data_agg = '''
            "aggs": {
                "1": {
                  "max": {
                    "field": "%s"
                  }
                }
            }
        ''' % field

        data_json = '''
        { "size": 0, %s  %s
        } ''' % (data_query, data_agg)

        logger.debug("{} {}".format(anonymize_url(url), data_json))

        headers = {"Content-Type": "application/json"}

        res = self.requests.post(url, data=data_json, headers=headers)
        res.raise_for_status()
        res_json = res.json()

        if 'aggregations' in res_json:
            last_value = res_json["aggregations"]["1"]["value"]

            if offset:
                if last_value is not None:
                    last_value = int(last_value)
            else:
                if "value_as_string" in res_json["aggregations"]["1"]:
                    last_value = res_json["aggregations"]["1"][
                        "value_as_string"]
                    last_value = str_to_datetime(last_value)
                else:
                    last_value = res_json["aggregations"]["1"]["value"]
                    if last_value:
                        try:
                            last_value = unixtime_to_datetime(last_value)
                        except InvalidDateError:
                            # last_value is in microsecs
                            last_value = unixtime_to_datetime(last_value /
                                                              1000)
        return last_value
コード例 #28
0
ファイル: meetup.py プロジェクト: bloriot97/grimoirelab-elk
    def get_rich_item(self, item):
        # We need to detect the category of item: activities (report), events or users
        eitem = {}

        if 'time' not in item['data']:
            logger.warning("[meetup] Not processing %s: no time field", item['uuid'])
            return eitem

        for f in self.RAW_FIELDS_COPY:
            if f in item:
                eitem[f] = item[f]
            else:
                eitem[f] = None

        event = item['data']

        # data fields to copy
        copy_fields = ["id", "how_to_find_us"]
        for f in copy_fields:
            if f in event:
                eitem[f] = event[f]
            else:
                eitem[f] = None

        # Fields which names are translated
        map_fields = {
            "link": "url",
            "rsvp_limit": "rsvps_limit"
        }
        for fn in map_fields:
            if fn in event:
                eitem[map_fields[fn]] = event[fn]
            else:
                eitem[f] = None

        # event host fields: author of the event
        if 'event_hosts' in event:
            host = event['event_hosts'][0]
            if 'photo' in host:
                eitem['member_photo_url'] = host['photo']['photo_link']
                eitem['member_photo_id'] = host['photo']['id']
                eitem['member_photo_type'] = host['photo']['type']
            eitem['member_is_host'] = True
            eitem['member_id'] = host['id']
            eitem['member_name'] = host['name']
            eitem['member_url'] = "https://www.meetup.com/members/" + str(host['id'])

        eitem['event_url'] = event['link']

        # data fields to copy with meetup`prefix
        copy_fields = ["description", "plain_text_description",
                       "name", "status", "utc_offset", "visibility",
                       "waitlist_count", "yes_rsvp_count", "duration",
                       "featured", "rsvpable"]
        copy_fields_time = ["time", "updated", "created"]

        for f in copy_fields:
            if f in event:
                eitem["meetup_" + f] = event[f]
            else:
                eitem[f] = None

        for f in copy_fields_time:
            if f in event:
                eitem["meetup_" + f] = unixtime_to_datetime(event[f] / 1000).isoformat()
            else:
                eitem[f] = None

        rsvps = event.get('rsvps', [])

        eitem['num_rsvps'] = len(rsvps)
        eitem['num_comments'] = len(event['comments'])

        try:
            if 'time' in event:
                eitem['time_date'] = unixtime_to_datetime(event['time'] / 1000).isoformat()
            else:
                logger.warning("time field nof found in event")
                return {}
        except ValueError:
            logger.warning("Wrong datetime for %s: %s", eitem['url'], event['time'])
            # If no datetime for the enriched item, it is useless for Kibana
            return {}

        if 'venue' in event:
            venue = event['venue']
            copy_fields = ["id", "name", "city", "state", "zip", "country",
                           "localized_country_name", "repinned", "address_1"]
            for f in copy_fields:
                if f in venue:
                    eitem["venue_" + f] = venue[f]
                else:
                    eitem[f] = None

            eitem['venue_geolocation'] = {
                "lat": event['venue']['lat'],
                "lon": event['venue']['lon'],
            }

        if 'series' in event:
            eitem['series_id'] = event['series']['id']
            eitem['series_description'] = event['series']['description']
            eitem['series_start_date'] = event['series']['start_date']

        if 'group' in event:
            group = event['group']
            copy_fields = ["id", "created", "join_mode", "name", "url_name",
                           "who"]
            for f in copy_fields:
                if f in group:
                    eitem["group_" + f] = group[f]
                else:
                    eitem[f] = None

            eitem['group_geolocation'] = {
                "lat": group['lat'],
                "lon": group['lon'],
            }

            if eitem['group_created']:
                eitem['group_created'] = unixtime_to_datetime(eitem['group_created'] / 1000).isoformat()

            eitem['group_topics'] = []
            eitem['group_topics_keys'] = []
            if 'topics' in group:
                group_topics = [topic['name'] for topic in group['topics']]
                group_topics_keys = [topic['urlkey'] for topic in group['topics']]
                eitem['group_topics'] = group_topics
                eitem['group_topics_keys'] = group_topics_keys

        if len(rsvps) > 0:
            eitem['group_members'] = rsvps[0]['group']['members']

        created = unixtime_to_datetime(event['created'] / 1000).isoformat()
        eitem['type'] = "meetup"
        # time_date is when the meetup will take place, the needed one in this index
        # created is when the meetup entry was created and it is not the interesting date
        eitem.update(self.get_grimoire_fields(eitem['time_date'], eitem['type']))

        if self.sortinghat:
            eitem.update(self.get_item_sh(event))

        if self.prjs_map:
            eitem.update(self.get_item_project(eitem))

        self.add_repository_labels(eitem)
        self.add_metadata_filter_raw(eitem)
        return eitem
コード例 #29
0
    def get_rich_item(self, item, kind='question', question_tags=None):
        eitem = {}

        # Fields common in questions and answers
        common_fields = ["title", "comment_count", "question_id",
                         "delete_vote_count", "up_vote_count",
                         "down_vote_count", "favorite_count", "view_count",
                         "last_activity_date", "link", "score", "tags"]

        if kind == 'question':
            for f in self.RAW_FIELDS_COPY:
                if f in item:
                    eitem[f] = item[f]
                else:
                    eitem[f] = None
            # The real data
            question = item['data']

            eitem["item_id"] = question['question_id']
            eitem["type"] = 'question'
            eitem["author"] = None
            if 'owner' in question and question['owner']['user_type'] == "does_not_exist":
                logger.warning("question without owner: %s", question['question_id'])
            else:
                eitem["author"] = question['owner']['display_name']
                eitem["author_link"] = None
                if 'link' in question['owner']:
                    eitem["author_link"] = question['owner']['link']
                eitem["reputation"] = None
                if 'reputation' in question['owner']:
                    eitem["author_reputation"] = question['owner']['reputation']

            # data fields to copy
            copy_fields = common_fields + ['answer_count']
            for f in copy_fields:
                if f in question:
                    eitem[f] = question[f]
                else:
                    eitem[f] = None

            eitem["question_tags"] = question['tags']
            # eitem["question_tags_custom_analyzed"] = question['tags']

            # Fields which names are translated
            map_fields = {"title": "question_title"}
            for fn in map_fields:
                eitem[map_fields[fn]] = question[fn]
            eitem['title_analyzed'] = question['title']

            eitem['question_has_accepted_answer'] = 0
            eitem['question_accepted_answer_id'] = None

            if question['answer_count'] >= 1 and 'answers' not in question:
                logger.warning("Missing answers for question %s", question['question_id'])
            elif question['answer_count'] >= 1 and 'answers' in question:
                answers_id = [p['answer_id'] for p in question['answers']
                              if 'is_accepted' in p and p['is_accepted']]
                eitem['question_accepted_answer_id'] = answers_id[0] if answers_id else None
                eitem['question_has_accepted_answer'] = 1 if eitem['question_accepted_answer_id'] else 0

            creation_date = unixtime_to_datetime(question["creation_date"]).isoformat()
            eitem['creation_date'] = creation_date
            eitem.update(self.get_grimoire_fields(creation_date, "question"))

            if self.sortinghat:
                eitem.update(self.get_item_sh(item))

            if self.prjs_map:
                eitem.update(self.get_item_project(eitem))

            self.add_repository_labels(eitem)
            self.add_metadata_filter_raw(eitem)

        elif kind == 'answer':
            answer = item

            eitem["type"] = 'answer'
            eitem["author"] = answer['owner']['display_name']
            eitem["author_link"] = None
            eitem["item_id"] = answer['answer_id']
            if 'link' in answer['owner']:
                eitem["author_link"] = answer['owner']['link']
            if 'reputation' in answer['owner']:
                eitem["author_reputation"] = answer['owner']['reputation']

            # data fields to copy
            copy_fields = common_fields + ["origin", "tag", "creation_date", "is_accepted", "answer_id"]
            for f in copy_fields:
                if f in answer:
                    eitem[f] = answer[f]
                else:
                    eitem[f] = None

            eitem['is_accepted_answer'] = 1 if answer['is_accepted'] else 0
            eitem['answer_status'] = "accepted" if answer['is_accepted'] else "not_accepted"

            eitem["question_tags"] = question_tags
            if 'tags' in answer:
                eitem["answer_tags"] = answer['tags']

            # Fields which names are translated
            map_fields = {"title": "question_title"
                          }
            for fn in map_fields:
                eitem[map_fields[fn]] = answer[fn]

            creation_date = unixtime_to_datetime(answer["creation_date"]).isoformat()
            eitem['creation_date'] = creation_date
            eitem.update(self.get_grimoire_fields(creation_date, "answer"))

            if self.sortinghat:
                # date field must be the same than in question to share code
                answer[self.get_field_date()] = eitem['creation_date']
                eitem[self.get_field_date()] = eitem['creation_date']
                eitem.update(self.get_item_sh(answer))

            if self.prjs_map:
                eitem.update(self.get_item_project(eitem))

        return eitem
コード例 #30
0
    def get_rich_item(self, item):
        eitem = {}

        for f in self.RAW_FIELDS_COPY:
            if f in item:
                eitem[f] = item[f]
            else:
                eitem[f] = None

        question = item['data']

        if 'accepted_answer_id' not in question:
            question['accepted_answer_id'] = None

        # Fields that are the same in item and eitem
        copy_fields = ["id", "url", "title", "summary", "score"]
        for f in copy_fields:
            if f in question:
                eitem[f] = question[f]
            else:
                eitem[f] = None
        # Fields which names are translated
        map_fields = {
            "title": "question_title",
            "answer_count": "question_answer_count",
            "view_count": "question_view_count",
            "answer_ids": "question_answer_ids"
        }
        for fn in map_fields:
            if fn in question:
                eitem[map_fields[fn]] = question[fn]
            else:
                eitem[map_fields[fn]] = None

        # Cast id of question to string
        eitem['id'] = str(eitem['id'])
        eitem['score'] = int(eitem['score']) if eitem['score'] else 0

        # First answer time
        added_at = unixtime_to_datetime(float(question["added_at"]))
        eitem['time_to_reply'] = None
        if 'answers' in question:
            # answers ordered by time
            first_answer_time = unixtime_to_datetime(
                float(question['answers'][0]["added_at"]))
            eitem['time_to_reply'] = get_time_diff_days(
                added_at, first_answer_time)
            eitem['question_has_accepted_answer'] = 1 if question[
                'accepted_answer_id'] else 0
            eitem['question_accepted_answer_id'] = question[
                'accepted_answer_id']
        else:
            eitem['question_has_accepted_answer'] = 0

        if question['author'] and type(question['author']) is dict:
            eitem['author_askbot_user_name'] = question['author']['username']
            eitem['author_askbot_id'] = str(question['author']['id'])
            eitem['author_badges'] = question['author']['badges']
            eitem['author_reputation'] = int(question['author']['reputation'])
            eitem['author_url'] = eitem['origin'] + '/users/'
            eitem['author_url'] += question['author']['id'] + '/' + question[
                'author']['username']

        eitem['question_last_activity_at'] = unixtime_to_datetime(
            float(question['last_activity_at'])).isoformat()
        eitem['question_last_activity_by_id'] = question['last_activity_by'][
            'id']
        eitem['question_last_activity_by_username'] = question[
            'last_activity_by']['username']
        # A list can be used directly to filter in kibana
        eitem['question_tags'] = question['tags']
        eitem['question_answer_ids'] = question['answer_ids']

        eitem['comment_count'] = 0
        if 'answers' in question:
            eitem['comment_count'] = sum([
                len(a['comments']) if 'comments' in a else 0
                for a in question['answers']
            ])

        if self.sortinghat:
            eitem.update(self.get_item_sh(item))

        if self.prjs_map:
            eitem.update(self.get_item_project(eitem))

        eitem["type"] = "question"
        eitem.update(
            self.get_grimoire_fields(added_at.isoformat(), eitem["type"]))

        self.add_repository_labels(eitem)
        self.add_metadata_filter_raw(eitem)
        return eitem