예제 #1
0
class GmailHistory(object):
    """
    This is the main class you instantiate to access the Gmail API to get the messages from user's mailbox.
    :ref : https://developers.google.com/gmail/api/v1/reference/

    """

    MAX_RESULTS = 500  # gmail api max results
    LABELIDS = ["INBOX"]  # labels to which, pub/sub updates are to be pushed
    GMAIL_CREATED_TS = "2004/01/01"  # year in which gmail has introduced
    GMAIL_WATCH_DELAY = 86400  # time in sec to make gmail api watch() request
    SCOPES = ("https://www.googleapis.com/auth/gmail.readonly"
              )  # type of permission to access gmail api

    def __init__(
        self,
        cred_path=None,
        topic_name=None,
        query="",
        file_path=None,
        status_path="/tmp/",
        targets=None,
        log=DUMMY_LOG,
    ):

        self.log = log
        self.cred_path = cred_path
        self.query = query
        self.gmail = None
        self.topic = topic_name
        self.file_path = file_path
        self.dd = DiskDict(status_path + "disk.dict")
        self.targets = targets
        self._pool = ThreadPool()

    def authorize(self):
        """

        Gets valid user credentials from the user specified cred path
        if nothing has been stored, or if the stored credentials are invalid,
        the OAuth2 flow is incomplete and throws an authentication failed error or file not found error

        client_secret.json : This is the name of the secret file you download from
                             https://console.developers.google.com/iam-admin/projects

        credentials.json : This is the file that will be created when user has authenticated and
                           will mean you don't have to re-authenticate each time you connect to the API
        """
        self.log.debug("authorize")

        store = file.Storage("{}credentials.json".format(self.cred_path))
        creds = store.get()

        if not creds or creds.invalid:
            flow = client.flow_from_clientsecrets(
                "{}client_secret.json".format(self.cred_path), self.SCOPES)
            creds = tools.run_flow(flow, store)

        # build return gmail service object on authentication
        self.gmail = build("gmail",
                           "v1",
                           http=creds.authorize(Http()),
                           cache_discovery=False)

        return self.gmail

    def save_files(self, message):
        """
        This fun helps to store gmail attachments from the given message.

        :calls : GET https://www.googleapis.com/gmail/v1/users/userId/messages/messageId/attachments/id
        :param message : dict

        """
        self.log.debug("save_file")

        for part in message["payload"].get("parts", ""):

            if not part["filename"]:
                continue

            file_id = part["body"]["attachmentId"]
            file_dic = (self.gmail.users().messages().attachments().get(
                userId="me", messageId=message["id"], id=file_id).execute())

            file_data = base64.urlsafe_b64decode(
                file_dic["data"].encode("UTF-8"))
            path = "".join([self.file_path, part["filename"]])

            with open(path, "w") as file_obj:
                file_obj.write(file_data)

            self.log.info("attachment saved to", path=path)

    def set_tmp_ts_to_last_msg(self):
        """

        This fun help to reset last_msg_ts to tmp_ts

        """
        self.log.debug("set_tmp_ts_to_last_msg")

        self.dd["last_msg_ts"] = self.dd["tmp_ts"]
        self.dd.close()

    def renew_mailbox_watch(self):
        """Renewing mailbox watch

        You must re-call watch() at least every 7 days or else you will stop receiving pub/sub updates.
        We recommend calling watch() once per day. The watch() response also has an
        expiration field with the timestamp for the watch expiration.

        :ref : https://developers.google.com/gmail/api/guides/push

        """
        while True:
            self.watch_gmail()
            time.sleep(self.GMAIL_WATCH_DELAY)

    def get_new_msg(self):
        """
        This fun help us to see any changes to the user's mailbox and gives new msgs if they are available.
        Note : startHistoryId - returns Histories(drafts, mail deletions, new mails) after start_history_id.

        :calls : GET https://www.googleapis.com/gmail/v1/users/userId/history

        >>> from mock import Mock, MagicMock
        >>> obj = GmailHistory()
        >>> obj.store_msgs_in_db = Mock()
        >>> obj.set_tmp_ts_to_last_msg = Mock()
        >>> obj.gmail = Mock()
        >>> obj.dd = MagicMock()
        >>> sample_doc = {'history': [{'messagesAdded': [{'message': {'labelIds': ['UNREAD'], 'id': '163861dac0f17c61'}}]}]}
        >>> obj.gmail.users().history().list().execute = Mock(obj.gmail.users().history().list().execute,return_value=sample_doc)
        >>> obj.get_new_msg()
        [{'labelIds': ['UNREAD'], 'id': '163861dac0f17c61'}]

        """
        self.log.debug("get_new_msg")

        msg_list = []
        new_msg = (self.gmail.users().history().list(
            userId="me", startHistoryId=self.dd["historyId"]).execute())

        if "history" not in new_msg:
            return

        for record in new_msg.get("history"):

            if "messagesAdded" not in record:
                continue

            msg = record.get("messagesAdded")[0]["message"]

            if msg.get("labelIds")[0] == "DRAFT":
                continue

            msg_list.append(msg)

        self.store_msgs_in_db(msg_list)
        self.set_tmp_ts_to_last_msg()

        return msg_list

    def watch_gmail(self):
        """To recive Push Notifications

        In order to receive notifications from Cloud Pub/Sub topic,
        simply we can call watch() from google api client on the Gmail user mail box.
        :ref : https://developers.google.com/gmail/api/guides/push

        :calls : POST https://www.googleapis.com/gmail/v1/users/userId/watch

        >>> from mock import Mock
        >>> obj = GmailHistory()
        >>> obj.gmail = Mock()
        >>> api_doc = {'historyId':1234,'expiration':1526901631234}
        >>> obj.gmail.users().watch().execute = Mock(obj.gmail.users().watch().execute, return_value=api_doc)
        >>> obj.watch_gmail()
        {'expiration': 1526901631234, 'historyId': 1234}

        """
        self.log.debug("watch_gmail")

        request = {
            "labelIds": self.LABELIDS,
            "topicName": "{}".format(self.topic)
        }

        hstry_id = self.gmail.users().watch(userId="me",
                                            body=request).execute()

        self.log.info("Gmail_watch_id :", hstryid=hstry_id)

        return hstry_id

    def send_msgs_to_target(self, target, msg):
        """
        This fun helps to send msg to target database and store it.

        :param target : db_obj
        :param msg : dict

        """
        self.log.debug("send msgs to tatgets")

        target.insert_msg(msg)

    def write_message(self, msg):
        """
        This function helps to push msgs to databases in asynchronous manner, if more than one db is specified.

        :param msg: dict

        """
        self.log.debug("write msgs in db")

        if self.targets:
            fn = self.send_msgs_to_target

            jobs = []
            for t in self.targets:
                jobs.append(self._pool.apply_async(fn, (t, deepcopy(msg))))

            for j in jobs:
                j.wait()

    def change_diskdict_state(self, message):
        """
        This fun helps us to change the state of diskdict

        :param message : dict

        """

        # for every msg the last_msg_ts will be replace with new msg internalDate
        self.dd["last_msg_ts"] = message["internalDate"]

        if "frst_msg_ts" not in self.dd.keys() or (self.dd["frst_msg_ts"] <=
                                                   message["internalDate"]):
            self.dd["frst_msg_ts"] = message["internalDate"]
            self.dd["historyId"] = message["historyId"]

    def store_msgs_in_db(self, msgs_list):
        """
        Get msg ids from list of messages amd makes an api call with the msg id
        and store in db.

        :params msgs_list : list
        :calls : GET https://www.googleapis.com/gmail/v1/users/userId/messages/id

        """
        self.log.debug("store_msgs_in_db")

        for msg in msgs_list:

            message = (self.gmail.users().messages().get(
                userId="me", id=msg["id"]).execute())

            self.write_message(message)
            self.change_diskdict_state(message)

            if self.file_path:
                self.save_files(message)

    def get_default_ts(self):
        """
        This fun helps to return next day date from today in Y/m/d format

        :rtype: str

        """
        self.log.debug("get_default_ts")

        return (datetime.now() + timedelta(days=1)).strftime("%Y/%m/%d")

    def get_history(self, before, after=GMAIL_CREATED_TS):
        """
        Get all the msgs from the user's mailbox with in given dates and store in the db
        Note : Gmail api will consider 'before' : excluded date, 'after' : included date
        Eg: before : 2017/01/01, after : 2017/01/31 then gmail api gives msgs from 2017/01/02 - 2017/01/31

        :ref : https://developers.google.com/gmail/api/guides/filtering
        :calls : GET https://www.googleapis.com/gmail/v1/users/userId/messages

        :param before : string
        :param after : string
        :rtype : list

        >>> from mock import Mock
        >>> obj = GmailHistory()
        >>> obj.gmail = Mock()
        >>> api_doc = {'messages':[{'id':'163861dac0f17c61'},{'id':'1632163b6a84ab94'}]}
        >>> obj.gmail.users().messages().list().execute = Mock(obj.gmail.users().messages().list().execute, return_value=api_doc)
        >>> obj.store_msgs_in_db = Mock()
        >>> obj.get_history('2017/05/10')
        [{'id': '163861dac0f17c61'}, {'id': '1632163b6a84ab94'}]

        """
        self.log.debug("fun get history")

        query = "{} before:{} after:{}".format(self.query, before, after)
        response = (self.gmail.users().messages().list(
            userId="me", maxResults=self.MAX_RESULTS, q=query).execute())
        msgs = []
        response = AttrDict(response)

        if "messages" in response:
            msgs.extend(response.messages)
            self.store_msgs_in_db(response.messages)

        while "nextPageToken" in response:
            page_token = response.nextPageToken
            response = (self.gmail.users().messages().list(
                userId="me",
                maxResults=self.MAX_RESULTS,
                q=query,
                pageToken=page_token,
            ).execute())
            response = AttrDict(response)

            if response.resultSizeEstimate is not 0:
                msgs.extend(response.messages)
                self.store_msgs_in_db(response.messages)

        return msgs

    def get_oldest_date(self, ts):
        """
        This fun helps to get next day date from given timestamp.

        :param ts: str (Unix time stamp)
        :rtype: str

        >>> obj=GmailHistory()
        >>> obj.get_oldest_date('1526901630000')
        '2018/05/22'

        """
        self.log.debug("get_oldest_date")

        return (datetime.fromtimestamp(int(ts[:10])) +
                timedelta(days=1)).strftime("%Y/%m/%d")

    def get_latest_date(self, ts):
        """
        This function helps us to get date from given timestamp

        :param ts: str (Unix time stamp)
        :rtype: str

        >>> obj=GmailHistory()
        >>> obj.get_latest_date('1526901630000')
        '2018/05/21'

        """
        self.log.debug("get_latest_date")

        return (datetime.fromtimestamp(int(ts[:10]))).strftime("%Y/%m/%d")

    def start(self):
        self.log.debug("start")

        # Gets next day date from current date as before_ts in 'yr/m/d' format
        # and check for last_msg_ts key in diskdict file
        before_ts = self.get_default_ts()
        last_msg_ts = self.dd.get("last_msg_ts", 0)

        # If any messages present in diskdict, get the last_msg_ts value  and replace before_ts var with the
        # last_msg_ts with 'yr/m/d' format
        if last_msg_ts:
            before_ts = self.get_oldest_date(last_msg_ts)

        # Get and store the messages from before_ts date to the time gmail has created
        self.get_history(before_ts)
        self.dd["tmp_ts"] = self.dd["last_msg_ts"]

        # Recheck for any new messages from the time, execution has happened
        after = self.get_latest_date(self.dd["frst_msg_ts"])
        self.get_history(self.get_default_ts(), after)

        # reset last_msg_ts to temp_ts
        self.set_tmp_ts_to_last_msg()
예제 #2
0
class SlackHistory(object):
    """
    FIXME: explain the responsbilities of this abstraction
    This module gives all the history between given timestamps
    """

    CACHE_LEN = 100
    CACHE_LIFE_TIME = 500
    CHUNK_SIZE = 1024

    def __init__(
        self,
        auth_token=None,
        connect_time=time.time(),
        dict_path="/tmp",
        file_path=None,
        targets=None,
        log=DUMMY_LOG,
    ):
        self.auth_token = auth_token
        self.slack = SlackClient(self.auth_token)
        self.connect_time = connect_time

        self.targets = targets
        self.file_path = file_path
        self.dd = DiskDict(dict_path + "/disk.dict")

        self.log = log
        self.username_cache = ExpiringDict(self.CACHE_LEN, self.CACHE_LIFE_TIME)
        self._pool = ThreadPool()

    def get_username(self, _id):
        """
        >>> from mock import Mock
        >>> ob = SlackHistory()
        >>> ob.slack.api_call = Mock(ob.slack.api_call, return_value={"user":{"name":"asdf"}})
        >>> ob.get_username(677)
        'asdf'
        """
        name = self.username_cache

        if _id in name:
            return name["user_id"]

        user_name = self.slack.api_call("users.info", user=_id)
        name["user_id"] = user_name["user"]["name"]

        return name["user_id"]

    def get_permalink(self, channel_id, ts):
        """
        >>> from mock import Mock
        >>> ob = SlackHistory()
        >>> ob.slack.api_call = Mock(ob.slack.api_call,
        ...     return_value={"permalink":'https://justadummy.com'})
        >>> ob.get_permalink(677, 123545)
        'https://justadummy.com'
        >>> ob.slack.api_call = Mock(ob.slack.api_call,
        ...     return_value={"error":"No link found"})
        >>> ob.get_permalink(677, 123545)
        False
        """
        link = self.slack.api_call(
            "chat.getPermalink", channel=channel_id, message_ts=ts
        )
        if "permalink" in link:
            return link["permalink"]

        return False

    def get_file(self, url, filename):
        headers = {"Authorization": "Bearer " + self.auth_token}
        r = requests.get(url, headers=headers)
        with open(os.path.join(self.file_path, filename), "wb") as f:
            for chunk in r.iter_content(self.CHUNK_SIZE):
                if chunk:
                    f.write(chunk)

    def replace_text(self, text):
        """
        >>> from mock import Mock
        >>> ob = SlackHistory()
        >>> ob.get_username = Mock(ob.get_username, return_value="asdf")
        >>> ob.replace_text('<@677> hii')
        '@asdf hii'
        """
        self.log.debug("replacing text")
        text_list = text.split(" ")
        for counter in range(0, len(text_list)):
            if "<@" in text_list[counter]:
                user_id = text_list[counter].split("<@")[1].split(">")[0]
                if "|" in user_id:
                    user_id = user_id.split("|")[0]
                username = self.get_username(user_id)
                text_list[counter] = "@" + username

        return " ".join(text_list)

    def get_key(self, channel, timestamp):
        """
        >>> ob = SlackHistory()
        >>> ob.get_key("U8S2NSX6J", "1518582605.000239")
        '18a920561dec17877db2a4628c13734e2f63df1d'
        """
        return hashlib.sha1((channel + timestamp).encode("utf8")).hexdigest()

    def parse_dict(self, msg):
        """
        >>> from mock import Mock
        >>> ob = SlackHistory()
        >>> ob.get_permalink = Mock(ob.get_permalink, return_value="http://justadummy.com")
        >>> ob.replace_text = Mock(ob.replace_text, return_value="uploaded a file @asdf")
        >>> ob.get_username = Mock(ob.get_username, return_value="justaname")
        >>> ob.get_file = Mock()
        >>> ob.parse_dict(AttrDict({'channel': 'abcd', 'user': '******', 'text': 'uploaded a file <@123>', 'ts': '1518582605.000239'}))
        AttrDict({'permalink': 'http://justadummy.com', 'text': 'uploaded a file @asdf', 'ts': '1518582605.000239', 'user': '******', 'user_name': 'justaname', 'channel': 'abcd'})
        """
        p = self.get_permalink(msg.channel, msg.ts)
        if p:
            msg.permalink = p

        if "user" in msg:
            user_name = self.get_username(msg.user)
            msg.user_name = user_name

        if "text" in msg and "<@" in msg.text:
            msg.text = self.replace_text(msg.text)

        if "uploaded a file" in msg.text and self.file_path is not None:
            filename = str(msg.ts) + "_" + msg.file.name.replace(" ", "_")
            self.get_file(msg.file.url_private_download, filename)

        return msg

    def change_status(self, _id, ts):

        if _id + "_oldest" in self.dd.keys() and ts > self.dd[_id + "_oldest"]:
            self.dd[_id + "_latest"] = ts
        else:
            self.dd[_id + "_oldest"] = ts

    def _send_msgs_to_target(self, target, msg):
        while 1:
            try:
                msg["key"] = self.get_key(msg["channel"], msg["ts"])
                target.insert_msg(msg)
                self.change_status(msg["channel"], msg["ts"])
                break
            except (SystemExit, KeyboardInterrupt):
                raise
            except:
                self.log.exception("_send_msgs_to_target_failed", target=target)
                time.sleep(self.WAIT_TIME_TARGET_FAILURE)

    def _write_messages(self, msg):
        if self.targets:
            fn = self._send_msgs_to_target

            jobs = []
            for t in self.targets:
                jobs.append(self._pool.apply_async(fn, (t, deepcopy(msg))))

            for j in jobs:
                j.wait()

    def get_history(self, slack, _id, _name, end_ts, start_ts=0):
        """
        >>> from mock import Mock
        >>> ob = SlackHistory()
        >>> ob.slack.api_call = Mock(ob.slack.api_call)
        >>> ob.slack.api_call.side_effect= [{'messages' :[{'message': 'Dummy <@123>', 'ts': '123.234'}], 'has_more': True}, {'messages' :[{'message': 'Dummy <@123>', 'ts': '122.234'}], 'has_more': False}]
        >>> ob.parse_dict = Mock(ob.parse_dict, return_value={'message': 'Dummy @asdf', 'ts': '123.234'})
        >>> ob._write_messages = Mock()
        >>> ob.get_history('users.info', '1234', 'general', 12345)
        2
        """
        messages = []
        ts = end_ts
        num = 0
        while True:
            response = self.slack.api_call(
                slack, channel=_id, latest=ts, oldest=start_ts, count=1000
            )
            if "messages" not in response:
                return num

            messages.extend(response["messages"])
            messages = sorted(messages, key=itemgetter("ts"))
            for message in messages:
                msg = AttrDict(message)
                msg.channel = _id
                msg.channel_name = _name

                msg = self.parse_dict(msg)

                self._write_messages(dict(msg))

            num += len(messages)

            if response["has_more"]:
                ts = messages[-1]["ts"]
                messages = []
            else:
                return num

    def get_channel_status(self, _type, _id, name):

        if not _id + "_oldest" in self.dd.keys():
            last_ts = self.connect_time
        else:
            last_ts = self.dd[_id + "_oldest"]

        num = self.get_history(_type, _id, name, last_ts)

        if not _id + "_latest" in self.dd.keys():
            latest_ts = 0
        else:
            latest_ts = self.dd[_id + "_latest"]

        if latest_ts != self.connect_time:
            num = num + self.get_history(_type, _id, name, self.connect_time, latest_ts)

        self.log.info(
            "finished_inserting_channel_data",
            channel=_id,
            channel_name=name,
            num_msg=num,
            type="metric",
        )

        time.sleep(1)

    def get_public_channels_list(self):
        _type = "channels.history"
        public_channels = self.slack.api_call("channels.list")["channels"]
        for channel in public_channels:
            self.log.info("fetching_public_channel_message", channel=channel["name"])
            self.get_channel_status(_type, channel["id"], channel["name"])

    def get_private_channels_list(self):
        _type = "groups.history"
        private_channels = self.slack.api_call("groups.list")["groups"]
        for channel in private_channels:
            self.log.info("fetching_private_channel_message", channel=channel["name"])

            self.get_channel_status(_type, channel["id"], channel["name"])

    def get_direct_channels_list(self):
        _type = "im.history"
        direct_channels = self.slack.api_call("im.list")["ims"]
        for channel in direct_channels:
            self.log.info("fetching_direct_channel_message", channel=channel["user"])
            self.get_channel_status(_type, channel["id"], channel["user"])

    def start(self):
        self.get_public_channels_list()
        self.get_private_channels_list()
        self.get_direct_channels_list()
예제 #3
0
class GithubHistory(object):

    """
    This is the main class you instantiate to access the Github API v3 and store all msgs in the db.

    """

    def __init__(
        self,
        auth_token=None,
        repos=None,
        status_path="/tmp/",
        targets=None,
        log=DUMMY_LOG,
    ):

        self.git = Github(auth_token)
        self.log = log
        self.repos = repos
        self.targets = targets
        self.store = None
        self.dd = DiskDict(status_path + "disk.dict")
        self._pool = ThreadPool()

    def get_repo_obj(self, repo_fullname):
        """
        :params repo_fullname : string
        :rtype: :class:`github.Repository.Repository`

        >>> obj = GithubHistory()
        >>> obj.get_repo_obj('org/reponame')
        Repository(full_name=None)

        """
        self.log.debug("fun : get repo obj")

        return self.git.get_repo(str(repo_fullname))

    def get_repos_list(self):
        """
        :rtype: :class:`github.PaginatedList.PaginatedList` of :class:`github.Repository.Repository`
        or
        :rtype: [class:`github.Repository.Repository`,..]

        >>> from mock import Mock
        >>> obj = GithubHistory()
        >>> m=Mock(obj.repos)
        >>> obj.repos=m.return_value='org/repo1,org/repo2'
        >>> obj.get_repos_list()
        [Repository(full_name=None), Repository(full_name=None)]

        """
        self.log.debug("fun : get repos list")

        if self.repos:
            return [self.get_repo_obj(repo) for repo in self.repos.split(",")]

        return self.git.get_user().get_repos()

    def get_raw_data(self, obj):
        """
        Gets raw json from the obj

        :param obj: class github
        :rtype: dict

        >>> obj = GithubHistory()
        >>> class test(object):
        ...    __dict__ = {'_rawData':{'id':'123456'}}
        ...
        >>> obj.get_raw_data(test())
        {'id': '123456'}

        """
        self.log.debug("fun : get raw data")

        for key, value in obj.__dict__.items():
            if key is "_rawData":
                return value

    def merge_dict(self, *args):
        """
        :params args: dict
        :rtype: dict

        >>> obj = GithubHistory()
        >>> obj.merge_dict({'a':1,'b':2},{'c':3,'d':4})
        {'a': 1, 'c': 3, 'b': 2, 'd': 4}

        """
        self.log.debug("fun : chain dict or merge dict")

        return dict(ChainMap(*args))

    def get_key(self, record):
        """
        :params record: dict
        :rtype: dict

        >>> obj = GithubHistory()
        >>> obj.get_key({'repository':{'updated_at':'21-04-14'},'issue':{'updated_at':'21-04-14'},'comment':{'updated_at':'21-04-14'}})
        {'id': '1c4882d4c922bcfdc070de97de03706c9276f8eb'}
        >>> obj.get_key({'repository':{'updated_at':'21-04-14'},'issue':{},'comment':{}})
        {'id': '8acfc9c43a5c9f64ee2070007591811f4048c907'}

        """
        self.log.debug("fun : get hash key")

        key = "%s%s%s" % (
            record.get("repository", {}).get("updated_at", 0),
            record.get("issue", {}).get("updated_at", 0),
            record.get("comment", {}).get("updated_at", 0),
        )

        # TODO: Need to add repo id
        return {"id": hashlib.sha1(key).hexdigest()}

    def send_msgs_to_target(self, target, msg):
        """
        :param target: db obj
        :param msg: dict

        """
        self.log.debug("send msgs to tatgets")

        target.insert_msg(msg)

    def write_message(self, msg):
        """
        :param msg: dict

        """
        self.log.debug("write msgs in db")

        if self.targets:
            fn = self.send_msgs_to_target

            jobs = []
            for t in self.targets:
                jobs.append(self._pool.apply_async(fn, (t, deepcopy(msg))))

            for j in jobs:
                j.wait()

    def store_record(self, repo, issue=None, comment=None):
        """
        :param repo:    class 'github.Repository.Repository'
        :param issue:   class 'github.Issue.Issue'
        :param comment: class 'github.IssueComment.IssueComment'

        >>> obj = GithubHistory()
        >>> class repo(object):
        ...      __dict__ = { '_rawData' : { 'id' : 1234 }}
        ...      class owner(object):
        ...          type = 'user'
        ...
        >>> class issue(object):
        ...      __dict__ = {'_rawData':{'id':5678}}
        ...
        >>> class comment(object):
        ...      __dict__ = {'_rawData':{'id':91011}}
        ...
        >>> obj.store_record(repo(), issue(), comment())
        {'comment': {'id': 91011}, 'issue': {'id': 5678}, 'id': '8aefb06c426e07a0a671a1e2488b4858d694a730', 'repository': {'id': 1234}}

        """
        self.log.debug("fun : store record")

        iss = cmnt = {}
        rp = self.get_repo_dict(repo)

        if issue:
            iss = self.get_issue_dict(issue)

        if issue and comment:
            cmnt = self.get_comment_dict(comment)

        record = self.merge_dict(rp, iss, cmnt)
        record.update(self.get_key(record))

        self.write_message(record)
        return record

    def get_repo_dict(self, repo):
        """
        :param repo: class 'github.Repository.Repository'
        :rtype: dict

        >>> from mock import Mock
        >>> class repo(object):
        ...     __dict__ = {'_rawData':{'id':'12345', 'name':'abcd'}}
        ...     class owner(object):
        ...         type = 'Organization'
        ...         login = '******'
        ...
        >>> class org(object):
        ...     __dict__ = {'_rawData':{'id':'12345'}}
        ...
        >>> obj=GithubHistory()
        >>> obj.git.get_organization = Mock(obj.git.get_organization,return_value=org())
        >>> obj.get_repo_dict(repo())
        {'organization': {'id': '12345'}, 'repository': {'id': '12345', 'name': 'abcd'}}

        """
        self.log.debug("fun : get repo dict")

        org_dict = {}

        if repo.owner.type == "Organization":
            org = self.git.get_organization(str(repo.owner.login))
            org_dict = {"organization": self.get_raw_data(org)}

        repo_dict = {"repository": self.get_raw_data(repo)}
        return self.merge_dict(org_dict, repo_dict)

    def get_issue_dict(self, issue):
        """
        :param issue: class 'github.Issue.Issue'
        :rtype: dict

        >>> from mock import Mock
        >>> obj = GithubHistory()
        >>> class issue(object):
        ...      __dict__ = {'_rawData':{'id':123456}}
        ...
        >>> obj.get_issue_dict(issue())
        {'issue': {'id': 123456}}

        """
        self.log.debug("fun : get issue dict")

        return {"issue": self.get_raw_data(issue)}

    def get_time(self, _time):
        """
        :param _time: string
        :rtype: datetime.datetime

        >>> obj = GithubHistory()
        >>> obj.get_time('2018-02-15T09:17:49Z')
        datetime.datetime(2018, 2, 15, 9, 17, 50)

        """
        self.log.debug("fun : get api time format")

        return datetime.strptime(_time, "%Y-%m-%dT%H:%M:%SZ") + timedelta(seconds=1)

    def get_comment_dict(self, cmnt):
        """
        :param cmnt:class 'github.IssueComment.IssueComment'
        :rtype: dict

        >>> from mock import Mock
        >>> obj = GithubHistory()
        >>> class comment(object):
        ...      __dict__ = {'_rawData':{'id':123456}}
        ...
        >>> obj.get_comment_dict(comment())
        {'comment': {'id': 123456}}

        """
        self.log.debug("fun : get comment dict")

        return {"comment": self.get_raw_data(cmnt)}

    def check_rate_limit(self):
        """
        Checks no of api calls remaining before going to any function
        if remaining calls of range less than 100 calls wait for some time.

        """
        self.log.debug("fun :check api rate limit")

        remaining, total = self.git.rate_limiting

        if remaining > 1 and remaining < 100:
            expiry = self.git.rate_limiting_resettime
            delay = (expiry - time.time()) + 60
            self.log.info("waiting for " + str(delay) + " sec")
            time.sleep(delay)

    def get_comments(self, repo, issue, changes=None):
        """
        Get comments related to the issue

        :param repo: class 'github.Repository.Repository'
        :param issue: class 'github.Issue.Issue'
        :param changes: string, eg: '2018-02-15T09:17:49Z'

        """
        self.log.debug("fun : get comments")

        self.check_rate_limit()

        # converting issue obj to raw dict as iss_dict
        iss_dict = self.get_raw_data(issue)

        # get issue created time in '%d/%m/%Y' as last_time
        last_time = self.get_time(iss_dict["created_at"])

        # In case there are changes in issue,replace the last_time
        if changes:
            last_time = self.get_time(changes)

        # get and store the comments since issue created or last issue comment updated time(in case of new comments)
        for comment in issue.get_comments(since=last_time):
            self.store_record(repo, issue, comment)

        self.store_record(repo, issue)

    def get_issues(self, repo):
        """
        Get issues related to the input Repository

        :param repo: class 'github.Repository.Repository'

        """
        self.log.debug("fun : get issues")

        self.check_rate_limit()
        for issue in repo.get_issues():

            # getting issue dict from issue obj as iss
            iss = self.get_issue_dict(issue)

            # passing the issue dict and checking in db for issue related records and changes
            # returns (count as 0 or 1),(changes as 0 or time in '2018-02-15T09:17:49Z' format)
            count, changes = self.store.check_issue_in_db(iss)

            # if no records and no changes found realted to issue in db,then get all the comments
            if count is 0:
                self.get_comments(repo, issue)
                continue

            # if records present in db,but the current issue updated time not matched with the last comment
            # updated time in db,so get the msgs from last comment updated time.
            if changes:
                self.get_comments(repo, issue, changes)

    def get_history(self):
        """
        Get user's account repos and from that iterate over issues and comments.
        get_history
           -> repos -> issues -> comments

        """
        self.log.debug("fun : get history")

        for repo in self.get_repos_list():

            # if repo doesn't contain any issues just store the record
            if repo.open_issues is 0:
                self.store_record(repo)
                continue

            # get the issues related to the repo
            self.get_issues(repo)

            # store the status in disk dict
            self.dd["repository"] = repo.full_name

    def start(self):
        self.log.debug("fun : start")

        # create db obj at 0th index from target
        self.store = self.targets[0]

        if "repository" not in self.dd.keys():
            self.get_history()

        # recheck for new messages
        self.get_history()

        self.log.info("Messages stored successfully")