Esempio n. 1
0
def save(self, gr_id, gr_info):

    # Generate pseudo-key-id
    __g_id = "g_" + str(gr_id)

    # Check if group exists at db
    if len(self.rd_instance_us.keys(__g_id)) == 0:

        # Save group and mark to active
        gr_info["state"] = "active"
        self.rd_instance_us.hmset("g_" + str(gr_id), gr_info)

        # Print alert
        if config.DEBUGGER:
            config.print_message("- Added Group: %d" % int(gr_id))

    # Group exists at redis
    else:

        # Get info at redis
        gr_rd = self.rd_instance_us.hgetall(__g_id)

        # Detect different information from two groups
        __new_group = st_diff.groups(gr_info, gr_rd)

        if __new_group is not None:

            # Generate new group
            self.rd_instance_us.hmset(__g_id, gr_info)

            # Print alert
            if config.DEBUGGER:
                config.print_message("- Updated Group: %d" % int(gr_id))
Esempio n. 2
0
def save(self, us_id, us_info):

    # Generate pseudo-key-id
    __u_id = "u_" + str(us_id)

    # Check if user exists at non gitlab users
    if len(self.rd_instance_us.keys(__u_id)) == 0:

        # Save user and mark to active
        us_info["state"] = "active"
        self.rd_instance_us.hmset("u_" + str(us_id), us_info)

        # Print alert
        if config.DEBUGGER:
            config.print_message("- Added User: %d" % int(us_id))

    # User exists at redis
    else:

        # Get info at redis
        us_rd = self.rd_instance_us.hgetall(__u_id)

        # Detect different information from two users
        __new_user = st_diff.users(us_info, us_rd)

        if __new_user is not None:

            __new_user = us_info

            # Generate new user
            self.rd_instance_us.hmset(__u_id, __new_user)

            # Print alert
            if config.DEBUGGER:
                config.print_message("- Updated User: %d" % int(us_id))
Esempio n. 3
0
    def update_information(self, update):

        __mt_gl = sniff.get_keys_and_values_from_gitlab(self, update)
        __mt_rd_id = sniff.get_keys_from_redis(self, update)
        __mt_gl_id = __mt_gl.keys()

        # Generate difference and intersection metadata
        __mt_diff = set(__mt_gl_id).difference(set(__mt_rd_id))
        __mt_int = set(__mt_gl_id).intersection(set(__mt_rd_id))
        __mt_mod = list(__mt_diff.union(__mt_int))
        __mt_del = list(set(__mt_rd_id).difference(set(__mt_gl_id)))

        # Print alert
        if config.DEBUGGER:
            config.print_message("- [ %s ] New or possible updates: %d | Deleted: %d" %
                                 (update, len(__mt_mod), len(__mt_del)))

        # Insert / Modify Information
        for i in __mt_mod:
            if update == "users":
                util_user.save(self, i, __mt_gl[i])
            elif update == "groups":
                util_group.save(self, i, __mt_gl[i])
            elif update == "projects":
                util_project.save(self, i, __mt_gl[i])

        # Delete Information
        for i in __mt_del:
            if update == "users":
                util_user.delete(self, i)
            elif update == "groups":
                util_group.delete(self, i)
            elif update == "projects":
                util_project.delete(self, i)
def generate_pycode():
    settings.print_message(" - Generating Python code ... ")
    f = open("generated/template.tmp", "r")
    t_file = f.read()
    f.close()
    f = open("generated/glapi.py", "w")
    f.write(t_file)
    f.close()
def generate_doc(branch):
    file_path = settings.GEN_DOC_DISK_PATH
    if os.path.exists(file_path):
        if not os.path.isdir(file_path):
            os.remove(file_path)
        else:
            shutil.rmtree(file_path, True)
    settings.print_message(" - Generating branch: %s." % branch)
    call(["./gitlab-docs/generate.rb"])
def generate_pypi_settings():
    settings.print_message(" - Generating pypi config ... ")
    f = open("generated/pypi.tmp", "r")
    p_settings = f.read()
    f.close()
    p_settings = p_settings.replace("PYPI_USERNAME", settings.PYPI_USER)
    p_settings = p_settings.replace("PYPI_PASSWORD", settings.PYPI_PASS)
    f = open(os.path.join(os.path.expanduser("~"), ".pypirc"), "w")
    f.write(p_settings)
    f.close()
def generate_settings(version):
    __version = str(version).replace("-", ".")
    settings.print_message(" - Generating settings.py: %s ... " % __version)
    f = open("generated/settings.tmp", "r")
    t_settings = f.read()
    f.close()
    t_settings = t_settings.replace("API_VERSION_TEMPLATE", __version + ".8")
    f = open("generated/settings.py", "w")
    f.write(t_settings)
    f.close()
Esempio n. 8
0
def _search_scholar_soup(soup,
                         max_papers_count,
                         total_papers,
                         start_paper,
                         skip_endnote=False,
                         print_level=0):
    """Generator that returns pub information dictionaries from the search page"""
    page_num = 1
    counter = 0
    while True:
        paper_blocks = soup.find_all('div', class_=lambda css_class: \
            ("gs_r" in css_class and "gs_or" in css_class) if css_class else False)
        page_total = len(paper_blocks)
        logger.debug(
            "Find papers on page #{0} (google_max_papers = {1})".format(
                page_num, max_papers_count))
        logger.debug("Total %i papers on page." % (page_total))
        for page_counter, paper in enumerate(paper_blocks):
            if counter >= max_papers_count:
                break
            counter += 1
            if print_level >= 0:
                settings.print_message(
                    "Process paper #{} (total {})".format(
                        counter, total_papers), print_level)
            logger.debug("Process paper #{} (total {})".format(
                counter, total_papers))
            logger.debug(
                "Parse html and get info about paper #{0} on searching page (total {1} on page)"
                .format(page_counter + 1, page_total))
            yield _get_info_from_resulting_selection(paper, skip_endnote,
                                                     print_level)
        if soup.find(class_='gs_ico gs_ico_nav_next'
                     ) and counter < max_papers_count:
            url = soup.find(
                class_='gs_ico gs_ico_nav_next').parent['href'].strip()
            result = True
            soup = None
            logger.debug("Load next page in resulting query selection.")
            while result and soup is None:
                soup = utils.get_soup(_FULLURL.format(_HOST, url))
                if soup is None:
                    result = None
                #    while result is None:
                #        result = input('Do not load new page on scholar. Try again? [Y/N]').lower()
                #        if result == "y": result = True
                #        elif result == "n": result = False
            if soup is None:
                logger.debug(
                    "Soup from google.scholar is None. Break from paper generator loop."
                )
                break
            page_num += 1
        else:
            break
def save(self, pr_id, pr_info):

    # Save project at fs if it is necessary
    save_fs(pr_info)

    # Generate pseudo-key-id
    __p_id = "p_" + str(pr_id)

    # Get project's owner from metadata
    if pr_info.get("owner") is None:
        pr_info["owner"] = "g_" + str(pr_info.get("namespace").get("id"))
    else:
        pr_info["owner"] = "u_" + str(pr_info.get("owner").get("id"))
    del pr_info["namespace"]

    # Get project's tags from Gitlab API
    pr_info['tags'] = map(
        lambda x: x.get("name").encode("ascii", "ignore"),
        self.gl_instance.get_projects_repository_tags_byId(id=pr_id)
    )

    # Generate state (boolean)
    pr_info['state'] = 'archived' if pr_info['archived'] == 'true' else 'active'
    del pr_info['archived']

    # Check if project exists at db
    if len(self.rd_instance_pr.keys(__p_id)) == 0:

        # Save project
        self.rd_instance_pr.hmset(__p_id, pr_info)

        # Print alert
        if config.DEBUGGER:
            config.print_message("- Added Project: %d" % int(pr_id))

    # Project exists at redis
    else:

        # Get info at redis
        pr_rd = self.rd_instance_pr.hgetall(__p_id)

        # Detect different information from two projects
        __new_project = st_diff.projects(pr_info, pr_rd)

        if __new_project is not None:

            # Generate new project
            self.rd_instance_pr.hmset(__p_id, pr_info)

            # Print alert
            if config.DEBUGGER:
                config.print_message("- Updated Project: %d" % int(pr_id))

    # Project has changes at branches, commits, metadata ...
    save_code(self, pr_id, pr_info.get("name"))
Esempio n. 10
0
def save(self, pr_id, pr_info):

    # Save project at fs if it is necessary
    save_fs(pr_info)

    # Generate pseudo-key-id
    __p_id = "p_" + str(pr_id)

    # Get project's owner from metadata
    if pr_info.get("owner") is None:
        pr_info["owner"] = "g_" + str(pr_info.get("namespace").get("id"))
    else:
        pr_info["owner"] = "u_" + str(pr_info.get("owner").get("id"))
    del pr_info["namespace"]

    # Get project's tags from Gitlab API
    pr_info['tags'] = map(
        lambda x: x.get("name").encode("ascii", "ignore"),
        self.gl_instance.get_projects_repository_tags_byId(id=pr_id))

    # Generate state (boolean)
    pr_info[
        'state'] = 'archived' if pr_info['archived'] == 'true' else 'active'
    del pr_info['archived']

    # Check if project exists at db
    if len(self.rd_instance_pr.keys(__p_id)) == 0:

        # Save project
        self.rd_instance_pr.hmset(__p_id, pr_info)

        # Print alert
        if config.DEBUGGER:
            config.print_message("- Added Project: %d" % int(pr_id))

    # Project exists at redis
    else:

        # Get info at redis
        pr_rd = self.rd_instance_pr.hgetall(__p_id)

        # Detect different information from two projects
        __new_project = st_diff.projects(pr_info, pr_rd)

        if __new_project is not None:

            # Generate new project
            self.rd_instance_pr.hmset(__p_id, pr_info)

            # Print alert
            if config.DEBUGGER:
                config.print_message("- Updated Project: %d" % int(pr_id))

    # Project has changes at branches, commits, metadata ...
    save_code(self, pr_id, pr_info.get("name"))
def generate_meta_code(file_dir):
    md = {}
    settings.print_message(" - Generating metadata from html docs ... ")
    for i in os.listdir(file_dir):
        gen_code = generate_code_from_file(i, file_dir + "/" + i)
        for j in gen_code:
            if j in md.keys():
                settings.print_message(" * Duplicated at [" + i + "]: " + md[j].get("string"))
            else:
                md[j] = gen_code[j]
    return md
Esempio n. 12
0
def get_pdf(rg_paper_id, filename):
    """Load pdf for paper with rg_paper_id and save to file filename"""
    url = get_pdf_url(rg_paper_id)
    if url is None:
        return False
    try:
        settings.print_message("\tDownload pdf...")
        return utils.download_file(url, filename)
    except BaseException:
        logger.warn(traceback.format_exc())
        return False
    return True
Esempio n. 13
0
def processFullDocument(pdf_file_name):
    """ Get info from header PDF """
    settings.print_message("Send to grobid service.", 2)
    data = get_data_from_grobid(GROBID_PROCESSED_FULL_TEXT_COMMAND,
                                open(pdf_file_name, 'rb'))
    settings.print_message("Check data.", 2)
    logger.debug("Check data.")
    if not data:
        logger.debug(
            "Server returned empty response (File processing failed), skip.")
        return None
    logger.debug("Convert completed!")
    return data
Esempio n. 14
0
def delete(self, us_id):

    # Generate pseudo-key-id
    __u_id = "u_" + str(us_id)

    # Check user exists
    if len(self.rd_instance_us.keys(__u_id)) > 0:

        # Remove from db
        self.rd_instance_us.delete(__u_id)

        # Print alert
        if config.DEBUGGER:
            config.print_message("- Removed User %d" % int(us_id))
Esempio n. 15
0
def get_pdf(DOI, filename):
    """Load pdf for paper with DOI and save to file filename"""
    url = get_pdf_url(DOI)
    if url is None:
        return False
    try:
        settings.print_message("Download pdf...", 2)
        utils.download_file(url, filename)
        return utils.check_pdf(filename)
    except BaseException:
        logger.warn(traceback.format_exc())
        # return False
        raise
    return True
Esempio n. 16
0
def delete(self, gr_id):

    # Generate pseudo-key-id
    __g_id = "g_" + str(gr_id)

    # Check if group exists
    if len(self.rd_instance_us.keys(__g_id)) > 0:

        # Set flag to deleted
        self.rd_instance_us.delete(__g_id)

        # Print alert
        if config.DEBUGGER:
            config.print_message("- Removed Group: %d" % int(gr_id))
Esempio n. 17
0
def save_fs(pr_info):

    # Create folder to allocate all repositories if it does not exist
    if not os.path.exists(config.COLLECTOR_GIT_FOLDER):
        os.makedirs(config.COLLECTOR_GIT_FOLDER)

    # Save (temp) current directory
    cur_dir = os.getcwd()

    # Generate pseudo-name-id and get url
    __pr_id = str(pr_info.get("id")) + "_" + pr_info.get("name")
    __pr_url = pr_info.get("http_url_to_repo")

    # Insert credentials HTTP/S
    __replace = "http://"
    if str(__pr_url).startswith("https://"):
        __replace = "https://"
    __pr_url = str(__pr_url).replace(
        __replace,
        __replace + config.GITLAB_USER + ":" + config.GITLAB_PASS + "@")

    # Change current directory to folder
    os.chdir(config.COLLECTOR_GIT_FOLDER)

    # Check repository does not exist
    if not os.path.exists(__pr_id):

        # Clone (mirror like bare repository)
        commands.getstatusoutput("git clone --mirror " + __pr_url + " " +
                                 __pr_id)

        # Print alert
        if config.DEBUGGER:
            config.print_message("- Cloned Project: " + pr_info.get("name"))

    # Repository exists
    else:

        # Change current directory to repository
        os.chdir(config.COLLECTOR_GIT_FOLDER + __pr_id)

        # Clone (mirror like bare repository)
        commands.getstatusoutput("git pull " + __pr_url)

        # Print alert
        if config.DEBUGGER:
            config.print_message("- Pulled Project: " + pr_info.get("name"))

    # Revert current directory
    os.chdir(cur_dir)
def save_fs(pr_info):

    # Create folder to allocate all repositories if it does not exist
    if not os.path.exists(config.COLLECTOR_GIT_FOLDER):
        os.makedirs(config.COLLECTOR_GIT_FOLDER)

    # Save (temp) current directory
    cur_dir = os.getcwd()

    # Generate pseudo-name-id and get url
    __pr_id = str(pr_info.get("id")) + "_" + pr_info.get("name")
    __pr_url = pr_info.get("http_url_to_repo")

    # Insert credentials HTTP/S
    __replace = "http://"
    if str(__pr_url).startswith("https://"):
        __replace = "https://"
    __pr_url = str(__pr_url).replace(
        __replace, __replace + config.GITLAB_USER + ":" + config.GITLAB_PASS + "@"
        )

    # Change current directory to folder
    os.chdir(config.COLLECTOR_GIT_FOLDER)

    # Check repository does not exist
    if not os.path.exists(__pr_id):

        # Clone (mirror like bare repository)
        commands.getstatusoutput("git clone --mirror " + __pr_url + " " + __pr_id)

        # Print alert
        if config.DEBUGGER:
            config.print_message("- Cloned Project: " + pr_info.get("name"))

    # Repository exists
    else:

        # Change current directory to repository
        os.chdir(config.COLLECTOR_GIT_FOLDER + __pr_id)

        # Clone (mirror like bare repository)
        commands.getstatusoutput("git pull " + __pr_url)

        # Print alert
        if config.DEBUGGER:
            config.print_message("- Pulled Project: " + pr_info.get("name"))

    # Revert current directory
    os.chdir(cur_dir)
Esempio n. 19
0
def get_request(url):
    """Send get request & return data"""
    while(True):
        resp = None
        try:
            resp = _SESSION.get(url)
            if resp.status_code != 200:
                settings.print_message("HTTP Error #{0}. {1}.".format(resp.status_code, resp.reason))
                return None
            return resp.content
        except Exception as error:
            logger.warn(traceback.format_exc())
            settings.print_message(error)
            if input("Try load again? [y/n]: ") == 'y': continue
            return None
    return None
Esempio n. 20
0
def get_pdf(url, filename):
    """Load pdf for paper with DOI and save to file filename"""
    settings.print_message("PDF-file found in google scholar.", 2)
    if url is None:
        return None
    try:
        settings.print_message("Download pdf...", 2)
        utils.download_file(url, filename)
        return utils.check_pdf(filename)
    except KeyboardInterrupt:
        raise
    except BaseException:
        logger.warn(traceback.format_exc())
        # return False
        raise
    return 0
Esempio n. 21
0
def get_pdf(QUESTION, filename):
    """Load pdf for paper with QUESTION and save to file filename"""
    if not QUESTION:
        return None
    url = get_pdf_url(QUESTION)
    if url is None:
        return None
    try:
        settings.print_message(
            "Download pdf from Sci-Hub by '{}'".format(QUESTION), 2)
        utils.download_file(url, filename)
        return utils.check_pdf(filename)
    except KeyboardInterrupt:
        raise
    except BaseException:
        logger.warn(traceback.format_exc())
        # return False
        raise
    return 0
Esempio n. 22
0
def delete(self, pr_id):

    # Generate pseudo-key-id
    __p_id = "p_" + str(pr_id)

    # Check project exists
    if len(self.rd_instance_pr.keys(__p_id)) > 0:

        # Get Info about project
        __pr_info = self.rd_instance_pr.hgetall(__p_id)

        # Move folder to deleted folder
        delete_fs(__pr_info)

        # Set flag to deleted
        self.rd_instance_pr.hset(__p_id, "state", "deleted")

        # Print alert
        if config.DEBUGGER:
            config.print_message("- Removed Project %d " % int(pr_id))
def delete(self, pr_id):

    # Generate pseudo-key-id
    __p_id = "p_" + str(pr_id)

    # Check project exists
    if len(self.rd_instance_pr.keys(__p_id)) > 0:

        # Get Info about project
        __pr_info = self.rd_instance_pr.hgetall(__p_id)

        # Move folder to deleted folder
        delete_fs(__pr_info)

        # Set flag to deleted
        self.rd_instance_pr.hset(__p_id, "state", "deleted")

        # Print alert
        if config.DEBUGGER:
            config.print_message("- Removed Project %d " % int(pr_id))
Esempio n. 24
0
def processReferencesDocument(pdf_file_name):
    """ Get references from article PDF """
    settings.print_message("Send to grobid service..", 2)
    data = get_data_from_grobid(GROBID_PROCESSED_REFERENCES_COMMAND,
                                open(pdf_file_name, 'rb'))
    settings.print_message("Check data", 2)
    logger.debug("Check data")
    if not data:
        logger.debug(
            "Server returned empty response (File processing failed), skip.")
        return None
    settings.print_message("Processing TEI data", 2)
    logger.debug("Convert tei to dictionary")
    dictData = tei2dict.tei_to_dict(data)
    logger.debug("Convert completed: {}".format(json.dumps(dictData)))
    if not dictData["references"]:
        logger.debug("References are not available, skip")
        return None
    return dictData["references"]
Esempio n. 25
0
def processHeaderDocument(pdf_file_name):
    """ Get info from header PDF """
    settings.print_message("Send to grobid service.", 2)
    data = get_data_from_grobid(GROBID_PROCESSED_HEADER_COMMAND,
                                open(pdf_file_name, 'rb'))
    settings.print_message("Check data.", 2)
    logger.debug("Check data.")
    if not data:
        logger.debug(
            "Server returned empty response (File processing failed), skip.")
        return None
    settings.print_message("Processing TEI data.", 2)
    logger.debug("Convert tei to dictionary.")
    dictData = tei2dict.tei_to_dict(data)
    logger.debug("Convert completed: {}".format(json.dumps(dictData)))
    authors = set(dictData["authors"]) if dictData["authors"] else []
    msg = "RESULT: has title:{:^3}has date:{:^3}has DOI:{:^3}has abstract:{:^3}authors:{:^4}has start page:{:^3}has end page:{:^3}has publisher:{:^3}".format(
        dictData["title"] is not None, dictData["pubdate"] is not None,
        dictData["DOI"] is not None, dictData["abstract"] is not None,
        len(authors), dictData["start_page"] is not None, dictData["end_page"]
        is not None, dictData["publisher"] is not None)
    dictData["abstract_ru"] = None
    logger.debug(msg)
    return dictData
def save_code(self, pr_id, pr_name):

    # Generate pseudo-key-id
    __p_id = "p_" + str(pr_id)

    # Generate metadata from gitlab
    __branches_gl_info = {}
    __branches = self.gl_instance.get_projects_repository_branches_byId(id=pr_id)
    [__branches_gl_info.update({
        x.get("name"): x
    })for x in __branches]

    # Generate metadata from redis
    __branches_rd_info = {}
    __branches = self.rd_instance_br.keys(__p_id + ":*")
    [__branches_rd_info.update({
        base64.b16decode(x.split(":")[1]): self.rd_instance_br.hgetall(x)
    }) for x in __branches]

    # Generate difference and intersection metadata
    __mt_diff = set(__branches_gl_info.keys()).difference(set(__branches_rd_info.keys()))
    __mt_int = set(__branches_gl_info.keys()).intersection(set(__branches_rd_info.keys()))
    __mt_mod = list(__mt_diff.union(__mt_int))
    __mt_del = list(set(__branches_rd_info.keys()).difference(set(__branches_gl_info.keys())))

    # Structure for removed commits
    __mt_del_commits = set()

    # Delete information about Branch
    count = 0
    for i in __mt_del:

        # Number of deleted branches
        count += 1

        # Print alert
        if config.DEBUGGER:
            config.print_message(
                "* (%d) [%d/%d] Deleted %s" %(int(pr_id), count, len(__mt_del), i)
            )

        # Get information from redis
        __br_info = __branches_rd_info[i]

        # Generate pseudo-key-id and remove info
        __br_id = __p_id + ":" + __br_info.get("id")
        self.rd_instance_br.delete(__br_id)
        self.rd_instance_br_co.delete(__br_id)

        # Remove links with contributors
        __br_con = eval(__br_info.get("contributors"))
        for j in __br_con:
            __us_com = self.rd_instance_us_co.smembers(j)
            for x in __us_com:
                if str(x).startswith(__br_id):
                    __mt_del_commits.add(str(x).split(":")[0] + ":" + str(x).split(":")[2])
                    self.rd_instance_us_co.srem(j, x)

    # Remove all unique commits
    if len(__mt_del_commits) > 0:
        __rd_branch_co = set()
        __rd_branch = self.rd_instance_br.keys(__p_id + "*")
        for i in __rd_branch:
            __rd_branch_co = __rd_branch_co.union(
                set(dict(self.rd_instance_br_co.zrange(i, 0, -1)).keys())
            )
        for i in __mt_del_commits:
            if i not in __rd_branch_co:
                self.rd_instance_co.delete(i)
            
    # Update information about Branch
    count = 0
    for i in __mt_mod:

        # Number of reviewed branches
        count += 1

        # Print alert
        if config.DEBUGGER:
            config.print_message(
                "* (%d) [%d/%d] Reviewed %s" %(int(pr_id), count, len(__mt_mod), i)
            )

        # Clean information
        __br_info = __branches_gl_info[i]
        st_clean.branch(__br_info)

        # Generate pseudo-key-id
        __br_id = __p_id + ":" + __br_info.get("id")

        # Save / Replace information at redis
        self.rd_instance_br.hmset(__br_id, __br_info)

        # Update information about branch's commits
        util_commit.update(self, pr_id, pr_name, i)
Esempio n. 27
0
def get_friends_graph():
    """ Get friends for users and create social graph. """
    total_users = 0
    count_bad_users = 0
    count_graph_users = 0

    save_counter = settings.SAVE_COUNT

    result = "successful"

    settings.print_message(
        "Create VK session for application (app_id={})".format(
            settings.VK_APPLICATION_ID))
    logger.debug("Create VK session for application (app_id={}).".format(
        settings.VK_APPLICATION_ID))
    try:
        session = vk.Session(access_token=settings.VK_ACCESS_TOKEN)
        api = vk.API(session)
    except Exception as error:
        logger.warn(traceback.format_exc())
        settings.print_message(
            "Can't create VK session for application with app_id={}".format(
                settings.VK_APPLICATION_ID))
        return ("with error.", 0, 0, 0)
    # logger.debug("".format())
    graph = yEdGraph.Graph()
    level_queue = [
        int(settings.PARAMS["user_id"]),
    ]
    count_levels = int(settings.PARAMS["levels"])
    all_level_ids = [list() for _ in range(count_levels)]

    for step in range(count_levels):
        level_counter = len(level_queue)
        settings.print_message("Process level #{} (total users {})".format(
            step, level_counter))
        logger.debug("Process level #{} (total users {})".format(
            step, level_counter))

        for user_index in range(level_counter):
            id = level_queue[user_index]
            total_users += 1
            save_counter -= 1
            if save_counter <= 0:
                save_counter = settings.SAVE_COUNT
                logger.debug("Create graphml for graph.")
                graph.construct_graphml()
                logger.debug("Save graphml in backup file backup_{}.".format(
                    settings.OUTPUT_FILE))
                try:
                    with open("backup_{}".format(settings.OUTPUT_FILE),
                              "w",
                              encoding=settings.OUTPUT_ENCODING) as f:
                        f.write(graph.get_graph())
                except Exception as error:
                    logger.warn(traceback.format_exc())
                    logger.warn("Can not create backup file.")
            settings.print_message(
                "Process id {}. User #{} on level #{} (total {})".format(
                    id, user_index, step, level_counter), 2)
            logger.debug(
                "Process id {}. User #{} on level #{} (total {}).".format(
                    id, user_index, step, level_counter))
            try:
                settings.print_message("Add user node in graph.", 3)
                logger.debug("Check user (id={}) in graph".format(id))
                if not id in graph.nodes.keys():
                    logger.debug(
                        "Create user node in graph (id={}).".format(id))
                    logger.debug("Get info for user (id={}).".format(id))
                    try:
                        user_info = api.users.get(
                            user_ids=[id],
                            fields=
                            "nickname, sex, bdate, city, country, photo_200_orig, photo_200, photo_100"
                        )
                        if not user_info:
                            raise Exception("User info is empty.")
                    except Exception as error:
                        logger.warn(traceback.format_exc())
                        logger.debug(
                            "Can not get info for user (id={}), skip.".format(
                                id))
                        settings.print_message(
                            "Can not get info for user, skip.", 3)
                        count_bad_users += 1
                        continue
                    user_info = user_info[0]
                    logger.debug("User info='{}'.".format(
                        json.dumps(user_info)))
                    logger.debug("Load user photo (id={}).".format(id))
                    photo = utils.get_request(
                        user_info[settings.VK_PHOTO_1 if settings.VK_PHOTO_1 in
                                  user_info else settings.VK_PHOTO_2])
                    if not photo:
                        logger.debug(
                            "Can't loading user photo (id={}).".format(id))
                    info_label = "ФИГ: {} {} \nНик: {} \nID: {} \nПол: {} \nДата рождения: {} \nГород: {} \nСтрана: {}".format(
                        user_info["first_name"] if "first_name" in user_info
                        else "----", user_info["last_name"] if "last_name"
                        in user_info else "----", user_info["nickname"]
                        if "nickname" in user_info else "----", id,
                        utils.SEX[user_info["sex"]]
                        if "sex" in user_info else "----",
                        user_info["bdate"] if "bdate" in user_info else "----",
                        user_info["city"] if "city" in user_info else "----",
                        user_info["country"]
                        if "country" in user_info else "----")
                    graph.add_node(id,
                                   check_existance=False,
                                   label=info_label,
                                   shape="roundrectangle",
                                   font_style="italic",
                                   underlined_text="false",
                                   img=photo,
                                   width="200",
                                   height="200",
                                   border_has_color="false")
                    count_graph_users += 1
                else:
                    logger.debug(
                        "Graph contains user node (id={}).".format(id))
                    settings.print_message(
                        "Graph already contains this user node.", 3)
                settings.print_message("Get friendlist.", 3)
                logger.debug("Get friends for user (id={}).".format(id))
                try:
                    friends = api.friends.get(
                        user_id=id,
                        count=1000000,
                        fields=
                        "nickname, sex, bdate, city, country, photo_200_orig, photo_200, photo_100"
                    )
                    if not friends: raise Exception("User info is empty.")
                except Exception as error:
                    logger.warn(traceback.format_exc())
                    logger.debug("Can not get friends, skip.")
                    settings.print_message("Can not get friendlist, skip.", 3)
                    count_bad_users += 1
                    continue
                settings.print_message(
                    "Process friends (total {}, level #{}).".format(
                        len(friends), step + 1), 3)
                logger.debug("Friends count: {}".format(len(friends)))
                _ = [
                    level_queue.append(friend["user_id"])
                    for i, friend in enumerate(friends)
                    if not friend["user_id"] in graph.nodes
                    and i < settings.PARAMS["max_processing_friends"]
                ]
                logger.debug("Add node for each friend and create edges.")
                for friend_index, friend in enumerate(friends):
                    total_users += 1
                    logger.debug("Process friend #{} (id={}).".format(
                        friend_index, friend["user_id"]))
                    settings.print_message(
                        "Process friends #{} id={} (total {}, level #{}).".
                        format(friend_index, friend["user_id"], len(friends),
                               step + 1), 4)
                    if friend_index > settings.PARAMS["max_processing_friends"]:
                        break
                    settings.print_message("Add user node in graph.", 5)
                    logger.debug("Check user (id={}) in graph".format(
                        friend["user_id"]))
                    if not friend["user_id"] in graph.nodes.keys():
                        logger.debug(
                            "Create user node in graph (id={}).".format(
                                friend["user_id"]))
                        logger.debug("User info='{}'.".format(
                            json.dumps(friend)))
                        logger.debug("Load user photo (id={}).".format(
                            friend["user_id"]))
                        photo = utils.get_request(
                            friend[settings.VK_PHOTO_1 if settings.
                                   VK_PHOTO_1 in
                                   friend else settings.VK_PHOTO_2])
                        if not photo:
                            logger.debug(
                                "Can't loading user photo (id={}).".format(
                                    friend["user_id"]))
                        info_label = "ФИГ: {} {} \nНик: {} \nID: {} \nПол: {} \nДата рождения: {} \nГород: {} \nСтрана: {}".format(
                            friend["first_name"] if "first_name" in friend else
                            "----", friend["last_name"] if "last_name"
                            in friend else "----", friend["nickname"]
                            if "nickname" in friend else "----",
                            friend["user_id"], utils.SEX[friend["sex"]]
                            if "sex" in friend else "----",
                            friend["bdate"] if "bdate" in friend else "----",
                            friend["city"] if "city" in friend else "----",
                            friend["country"]
                            if "country" in friend else "----")
                        graph.add_node(friend["user_id"],
                                       check_existance=False,
                                       label=info_label,
                                       shape="roundrectangle",
                                       font_style="italic",
                                       underlined_text="false",
                                       img=photo,
                                       width="200",
                                       height="200",
                                       border_has_color="false")
                        count_graph_users += 1
                    else:
                        logger.debug(
                            "Graph contains user node (id={}).".format(
                                friend["user_id"]))
                        settings.print_message(
                            "Graph already contains this user node.", 5)
                    logger.debug("Add edge {}-{} in graph.".format(
                        friend["user_id"], id))
                    # if ...
                    graph.add_edge(id,
                                   friend["user_id"],
                                   width="1.0",
                                   color="#000000",
                                   check_existance_nodes=False)
            except Exception as error:
                logger.warn(traceback.format_exc())
                result = "with error"
        level_queue = level_queue[level_counter:]
    logger.debug("Recovering the last level link.")
    settings.print_message("Recovering the last level link.")
    for user_index, user_id in enumerate(level_queue):
        settings.print_message(
            "Process id {}. User #{} on last level (total {})".format(
                user_id, user_index, len(level_queue)), 2)
        settings.print_message("Get friendlist.", 2)
        logger.debug("Get friends for user (id={}).".format(user_id))
        loop_counter = settings.MAX_RETRY
        friends = None
        while (loop_counter > 0):
            try:
                loop_counter -= 1
                friends = api.friends.get(user_id=user_id, count=1000000)
                time.sleep(0.3)
                if not friends:
                    raise utils.EmptyDataException("User info is empty.")
                break
            except vk.exceptions.VkAPIError as error:
                logger.warn(traceback.format_exc())
                if error.code == 6:
                    time.sleep(0.4)
                    continue
                else:
                    break
            except utils.EmptyDataException as error:
                logger.warn(traceback.format_exc())
                break
            except Exception as error:
                logger.warn(traceback.format_exc())
                count_bad_users += 1
                loop_counter = settings.MAX_RETRY
                settings.print_message("Can not get friendlist, skip?", 2)
                if input("[y/n]: ") == 'n': continue
                logger.debug("Can not get friends, skip.")
                break
        if not friends: continue
        settings.print_message(
            "Process friends (total {}).".format(len(friends)), 3)
        logger.debug("Friends count: {}".format(len(friends)))
        for friend_index, friend_id in enumerate(friends):
            if friend_id in graph.nodes.keys():
                logger.debug("Add edge {}-{} in graph.".format(
                    friend_id, user_id))
                graph.add_edge(user_id,
                               friend_id,
                               width="1.0",
                               color="#000000",
                               check_existance_nodes=False)
    logger.debug("Create graphml for graph.")
    graph.construct_graphml()
    logger.debug("Save graphml in file {}.".format(settings.OUTPUT_FILE))
    try:
        with open(settings.OUTPUT_FILE, "w",
                  encoding=settings.OUTPUT_ENCODING) as f:
            f.write(graph.get_graph())
        if os.path.exists("backup_{}".format(settings.OUTPUT_FILE)):
            os.remove("backup_{}".format(settings.OUTPUT_FILE))
    except Exception as error:
        logger.warn(traceback.format_exc())
        result = "with error"
    return (result, total_users, count_graph_users, count_bad_users)
Esempio n. 28
0
def get_friends_of_users(uids):
    """ Get lists of friends by users with uid of uids and create social graph  """
    total_users = 0
    count_bad_users = 0
    count_graph_users = 0

    save_counter = settings.SAVE_COUNT

    result = "successful"

    settings.print_message(
        "Create VK session for application (app_id={})".format(
            settings.VK_APPLICATION_ID))
    logger.debug("Create VK session for application (app_id={}).".format(
        settings.VK_APPLICATION_ID))
    try:
        session = vk.Session(access_token=settings.VK_ACCESS_TOKEN)
        api = vk.API(session)
    except Exception as error:
        logger.warn(traceback.format_exc())
        settings.print_message(
            "Can't create VK session for application with app_id={}".format(
                settings.VK_APPLICATION_ID))
        return ("with error.", 0, 0, 0)
    graph = yEdGraph.Graph()

    for user_index, id in enumerate(uids):
        total_users += 1
        save_counter -= 1
        if save_counter <= 0:
            save_counter = settings.SAVE_COUNT
            logger.debug("Create graphml for graph.")
            graph.construct_graphml()
            logger.debug("Save graphml in backup file backup_{}.".format(
                settings.OUTPUT_FILE))
            try:
                with open("backup_{}".format(settings.OUTPUT_FILE),
                          "w",
                          encoding=settings.OUTPUT_ENCODING) as f:
                    f.write(graph.get_graph())
            except Exception as error:
                logger.warn(traceback.format_exc())
                logger.warn("Can not create backup file.")
        settings.print_message(
            "Process id {}. User #{} (total {})".format(
                id, user_index, len(uids)), 2)
        logger.debug("Process id {}. User #{} (total {})".format(
            id, user_index, len(uids)))
        try:
            settings.print_message("Add user node in graph.", 3)
            logger.debug("Check user (id={}) in graph".format(id))
            if not id in graph.nodes.keys():
                logger.debug("Create user node in graph (id={}).".format(id))
                logger.debug("Get info for user (id={}).".format(id))
                try:
                    user_info = api.users.get(
                        user_ids=[id],
                        fields=
                        "nickname, sex, bdate, city, country, photo_200_orig, photo_200, photo_100"
                    )
                    if not user_info: raise Exception("User info is empty.")
                except Exception as error:
                    logger.warn(traceback.format_exc())
                    logger.debug(
                        "Can not get info for user (id={}), skip.".format(id))
                    settings.print_message("Can not get info for user, skip.",
                                           3)
                    count_bad_users += 1
                    continue
                user_info = user_info[0]
                id = user_info["uid"]
                logger.debug("User info='{}'.".format(json.dumps(user_info)))
                logger.debug("Load user photo (id={}).".format(id))
                photo = utils.get_request(
                    user_info[settings.VK_PHOTO_1 if settings.VK_PHOTO_1 in
                              user_info else settings.VK_PHOTO_2])
                if not photo:
                    logger.debug(
                        "Can't loading user photo (id={}).".format(id))
                info_label = "ФИГ: {} {} \nНик: {} \nID: {} \nПол: {} \nДата рождения: {} \nГород: {} \nСтрана: {}".format(
                    user_info["first_name"] if "first_name" in user_info else
                    "----", user_info["last_name"] if "last_name" in user_info
                    else "----", user_info["nickname"] if "nickname"
                    in user_info else "----", id, utils.SEX[user_info["sex"]]
                    if "sex" in user_info else "----",
                    user_info["bdate"] if "bdate" in user_info else "----",
                    user_info["city"] if "city" in user_info else "----",
                    user_info["country"] if "country" in user_info else "----")
                graph.add_node(id,
                               check_existance=False,
                               label=info_label,
                               shape="roundrectangle",
                               font_style="italic",
                               underlined_text="false",
                               img=photo,
                               width="200",
                               height="200",
                               border_has_color="false")
                count_graph_users += 1
            else:
                logger.debug("Graph contains user node (id={}).".format(id))
                settings.print_message(
                    "Graph already contains this user node.", 3)
            settings.print_message("Get friendlist.", 3)
            logger.debug("Get friends for user (id={}).".format(id))
            try:
                friends = api.friends.get(
                    user_id=id,
                    count=1000000,
                    fields=
                    "nickname, sex, bdate, city, country, photo_200_orig, photo_200, photo_100"
                )
                if not friends: raise Exception("User info is empty.")
            except Exception as error:
                logger.warn(traceback.format_exc())
                logger.debug("Can not get friends, skip.")
                settings.print_message("Can not get friendlist, skip.", 3)
                count_bad_users += 1
                continue
            settings.print_message(
                "Process friends (total {}).".format(len(friends)), 3)
            logger.debug("Friends count: {}".format(len(friends)))
            logger.debug("Add node for each friend and create edges.")
            for friend_index, friend in enumerate(friends):
                total_users += 1
                logger.debug("Process friend #{} (id={}).".format(
                    friend_index, friend["user_id"]))
                settings.print_message(
                    "Process friends #{} id={} (total {}).".format(
                        friend_index, friend["user_id"], len(friends)), 4)
                if friend_index > settings.PARAMS["max_processing_friends"]:
                    break
                settings.print_message("Add user node in graph.", 5)
                logger.debug("Check user (id={}) in graph".format(
                    friend["user_id"]))
                if not friend["user_id"] in graph.nodes.keys():
                    logger.debug("Create user node in graph (id={}).".format(
                        friend["user_id"]))
                    logger.debug("User info='{}'.".format(json.dumps(friend)))
                    logger.debug("Load user photo (id={}).".format(
                        friend["user_id"]))
                    photo = utils.get_request(
                        friend[settings.VK_PHOTO_1 if settings.VK_PHOTO_1 in
                               friend else settings.VK_PHOTO_2])
                    if not photo:
                        logger.debug(
                            "Can't loading user photo (id={}).".format(
                                friend["user_id"]))
                    info_label = "ФИГ: {} {} \nНик: {} \nID: {} \nПол: {} \nДата рождения: {} \nГород: {} \nСтрана: {}".format(
                        friend["first_name"] if "first_name" in friend else
                        "----", friend["last_name"]
                        if "last_name" in friend else "----",
                        friend["nickname"] if "nickname" in friend else "----",
                        friend["user_id"], utils.SEX[friend["sex"]]
                        if "sex" in friend else "----",
                        friend["bdate"] if "bdate" in friend else "----",
                        friend["city"] if "city" in friend else "----",
                        friend["country"] if "country" in friend else "----")
                    graph.add_node(friend["user_id"],
                                   check_existance=False,
                                   label=info_label,
                                   shape="roundrectangle",
                                   font_style="italic",
                                   underlined_text="false",
                                   img=photo,
                                   width="200",
                                   height="200",
                                   border_has_color="false")
                    count_graph_users += 1
                else:
                    logger.debug("Graph contains user node (id={}).".format(
                        friend["user_id"]))
                    settings.print_message(
                        "Graph already contains this user node.", 5)
                logger.debug("Add edge {}-{} in graph.".format(
                    friend["user_id"], id))
                # if ...
                graph.add_edge(id,
                               friend["user_id"],
                               width="1.0",
                               color="#000000",
                               check_existance_nodes=False)
        except Exception as error:
            logger.warn(traceback.format_exc())
            result = "with error"
    logger.debug("Create graphml for graph.")
    graph.construct_graphml()
    logger.debug("Save graphml in file {}.".format(settings.OUTPUT_FILE))
    try:
        with open(settings.OUTPUT_FILE, "w",
                  encoding=settings.OUTPUT_ENCODING) as f:
            f.write(graph.get_graph())
        if os.path.exists("backup_{}".format(settings.OUTPUT_FILE)):
            os.remove("backup_{}".format(settings.OUTPUT_FILE))
    except Exception as error:
        logger.warn(traceback.format_exc())
        result = "with error"
    return (result, total_users, count_graph_users, count_bad_users)
Esempio n. 29
0
def save_code(self, pr_id, pr_name):

    # Generate pseudo-key-id
    __p_id = "p_" + str(pr_id)

    # Generate metadata from gitlab
    __branches_gl_info = {}
    __branches = self.gl_instance.get_projects_repository_branches_byId(
        id=pr_id)
    [__branches_gl_info.update({x.get("name"): x}) for x in __branches]

    # Generate metadata from redis
    __branches_rd_info = {}
    __branches = self.rd_instance_br.keys(__p_id + ":*")
    [
        __branches_rd_info.update({
            base64.b16decode(x.split(":")[1]):
            self.rd_instance_br.hgetall(x)
        }) for x in __branches
    ]

    # Generate difference and intersection metadata
    __mt_diff = set(__branches_gl_info.keys()).difference(
        set(__branches_rd_info.keys()))
    __mt_int = set(__branches_gl_info.keys()).intersection(
        set(__branches_rd_info.keys()))
    __mt_mod = list(__mt_diff.union(__mt_int))
    __mt_del = list(
        set(__branches_rd_info.keys()).difference(
            set(__branches_gl_info.keys())))

    # Structure for removed commits
    __mt_del_commits = set()

    # Delete information about Branch
    count = 0
    for i in __mt_del:

        # Number of deleted branches
        count += 1

        # Print alert
        if config.DEBUGGER:
            config.print_message("* (%d) [%d/%d] Deleted %s" %
                                 (int(pr_id), count, len(__mt_del), i))

        # Get information from redis
        __br_info = __branches_rd_info[i]

        # Generate pseudo-key-id and remove info
        __br_id = __p_id + ":" + __br_info.get("id")
        self.rd_instance_br.delete(__br_id)
        self.rd_instance_br_co.delete(__br_id)

        # Remove links with contributors
        __br_con = eval(__br_info.get("contributors"))
        for j in __br_con:
            __us_com = self.rd_instance_us_co.smembers(j)
            for x in __us_com:
                if str(x).startswith(__br_id):
                    __mt_del_commits.add(
                        str(x).split(":")[0] + ":" + str(x).split(":")[2])
                    self.rd_instance_us_co.srem(j, x)

    # Remove all unique commits
    if len(__mt_del_commits) > 0:
        __rd_branch_co = set()
        __rd_branch = self.rd_instance_br.keys(__p_id + "*")
        for i in __rd_branch:
            __rd_branch_co = __rd_branch_co.union(
                set(dict(self.rd_instance_br_co.zrange(i, 0, -1)).keys()))
        for i in __mt_del_commits:
            if i not in __rd_branch_co:
                self.rd_instance_co.delete(i)

    # Update information about Branch
    count = 0
    for i in __mt_mod:

        # Number of reviewed branches
        count += 1

        # Print alert
        if config.DEBUGGER:
            config.print_message("* (%d) [%d/%d] Reviewed %s" %
                                 (int(pr_id), count, len(__mt_mod), i))

        # Clean information
        __br_info = __branches_gl_info[i]
        st_clean.branch(__br_info)

        # Generate pseudo-key-id
        __br_id = __p_id + ":" + __br_info.get("id")

        # Save / Replace information at redis
        self.rd_instance_br.hmset(__br_id, __br_info)

        # Update information about branch's commits
        util_commit.update(self, pr_id, pr_name, i)
Esempio n. 30
0
def dispatch(command):
    result = None
    logger.debug("command %s.", command)
    start_time = datetime.now()
    try:
        for case in utils.Switch(command):
            if case("getFriendsGraph"):
                logger.debug("Processing command '%s'." % command)
                settings.print_message("Processing command '%s'." % command)
                # START COMMAND
                result = get_friends_graph()
                logger.debug(
                    "Processing %s. Total users: %i. Users in graph: %i Bad requests: %i."
                    % result)
                settings.print_message(
                    "Processing %s. Total users: %i. Users in graph: %i Bad requests: %i."
                    % result)
                break
            if case():  # default
                logger.warn("Unknown command: %s" % command)
                settings.print_message("Unknown command: %s" % command)
                break
    except KeyboardInterrupt:
        settings.print_message(
            "Caught KeyboardInterrupt, terminating processing")
    except:
        logger.error(traceback.format_exc())
        settings.print_message("Processing finished with error.")
        settings.print_message("For more details, see the log.")
    end_time = datetime.now()
    settings.print_message("Run began on {0}".format(start_time))
    settings.print_message("Run ended on {0}".format(end_time))
    settings.print_message("Elapsed time was: {0}".format(end_time -
                                                          start_time))
    logger.debug("Run began on {0}".format(start_time))
    logger.debug("Run ended on {0}".format(end_time))
    logger.debug("Elapsed time was: {0}".format(end_time - start_time))
Esempio n. 31
0
def _cluster_handler(cluster_id, papers_count):
    logger.debug("Handle %i papers from cluster %s." %
                 (papers_count, cluster_id))
    url = _FULLURL.format(_HOST, _SCHOLARCLUSTER.format(cluster_id))
    logger.debug("Get cluster page URL='{0}'.".format(url))
    soup = utils.get_soup(url)
    #utils.soup2file(soup, "D:\A.html")
    # This dictionary contains info about unique papers
    EndNote_list = list()
    file_counter = 0
    merged_counter = 0

    # return true if EndNote_1 equal EndNote_2
    def is_EndNote_equal(EndNote_1, EndNote_2):        return \
EndNote_1["title"].lower() == EndNote_2["title"].lower() and \
(
            not "year" in EndNote_1 or not "year" in EndNote_2
            or EndNote_1["year"] == EndNote_2["year"]
        ) \
and len(EndNote_1["author"]) == len(EndNote_1["author"]) \
and EndNote_1["type"] == EndNote_2["type"] and \
(
            not "pages" in EndNote_1 or not "pages" in EndNote_2
            or EndNote_1["pages"] == EndNote_2["pages"]
        )

    # return list of similar papers (maybe empty)
    def intersect_papers(EndNote_data, EndNote_list):        return \
[i for i in EndNote_list if is_EndNote_equal(EndNote_data, i)]

    # Loop on pages
    while True:
        if soup is None:
            logger.debug(
                "Soup for cluster page URL='{0}' is None.".format(url))
            return None
        # This list contains links to EndNote and cited by count for each paper
        # in cluster
        logger.debug("Find EndNote links for each paper in cluster.")
        footer_links = [{
            "EndNote" if "EndNote" in link.text else "citedby":
            link["href"].strip() if "EndNote" in link.text else int(
                re.findall(r'\d+', link.text)[0])
            for link in paper_block.find("div", class_="gs_fl").find_all('a')
            if "EndNote" in link.text or "Cited" in link.text
            or "Цитируется" in link.text
        } for paper_block in soup.find_all('div', class_='gs_ri')]
        logger.debug(
            "Extract unique papers in cluster and load data from EndNote.")
        for links in footer_links:
            if links != {}:
                file_counter += 1
                logger.debug("EndNote file #%i (total %i)" %
                             (file_counter, papers_count))
                if links.get("EndNote"):
                    paper_EndNote_data = get_info_from_EndNote(
                        links["EndNote"], True)
                else:
                    settings.print_message(
                        'Error getting EndNote files. '
                        'Please change the display settings Google Scholar in English '
                        '(https://scholar.google.com/).')
                    logger.debug(
                        'End work programme because did not find link to EndNote file.'
                    )
                    raise Exception('Did not find EndNote.')
                if paper_EndNote_data is None:
                    logger.debug(
                        "Skip EndNote file #%i, could not upload file." %
                        file_counter)
                    continue
                if not "year" in paper_EndNote_data or not "author" in paper_EndNote_data:
                    logger.debug(
                        "Skip EndNote file #%i, empty year or authors fields."
                        % file_counter)
                else:
                    similar_papers = intersect_papers(paper_EndNote_data,
                                                      EndNote_list)
                    if similar_papers == []:
                        merged_counter += 1
                        logger.debug(
                            "EndNote file #%i miss all EndNote files in merged array."
                            % file_counter)
                        logger.debug("Add EndNote file #%i in merged array." %
                                     file_counter)
                        paper_EndNote_data.update({
                            "url_scholarbib":
                            links["EndNote"],
                            "citedby":
                            links["citedby"] if "citedby" in links else None
                        })
                        EndNote_list.append(paper_EndNote_data)
                    else:
                        similar_file = similar_papers[0]
                        similar_file_index = EndNote_list.index(similar_file)
                        if len(similar_file) < len(paper_EndNote_data):
                            logger.debug(
                                "EndNote file #{0} like #{1} EndNote file in merged array and has more fields, replace."
                                .format(file_counter, similar_file_index + 1))
                            EndNote_list[
                                similar_file_index] = paper_EndNote_data
                        else:
                            logger.debug(
                                "EndNote file #{0} like #{1} EndNote file in merged array, skipped."
                                .format(file_counter, similar_file_index + 1))
        # NEXT button on html page
        if soup.find(class_='gs_ico gs_ico_nav_next'):
            url = soup.find(
                class_='gs_ico gs_ico_nav_next').parent['href'].strip()
            logger.debug("Load next page in resulting query selection.")
            soup = utils.get_soup(_FULLURL.format(_HOST, url))
        else:
            break
    if merged_counter == 0:
        logger.debug(
            "All %i EndNote files in the cluster are not informative. No merged files."
            % file_counter)
    else:
        logger.debug(
            "All {0} EndNote files merged in {1} (i.e. distinct versions in cluster: {1}):"
            .format(file_counter, merged_counter))
        for counter, data in enumerate(EndNote_list):
            logger.debug("Merged EndNote file #%i:\n%s" %
                         (counter + 1, data["EndNote"]))
    return tuple(EndNote_list)
Esempio n. 32
0
def get_friends_graph():
    """ Get friends for users and create social graph. """
    total_users = 0
    count_bad_users = 0
    count_graph_users = 0

    result = "was successful"

    settings.print_message(
        "Create VK session for application (app_id={})".format(
            settings.VK_APPLICATION_ID))
    logger.debug("Create VK session for application (app_id={}).".format(
        settings.VK_APPLICATION_ID))
    try:
        session = vk.Session(access_token=settings.VK_ACCESS_TOKEN)
        api = vk.API(session)
    except Exception as error:
        logger.warn(traceback.format_exc())
        settings.print_message(
            "Can't create VK session for application with app_id={}".format(
                settings.VK_APPLICATION_ID))
        return ("with error.", 0, 0, 0)
    # logger.debug("".format())
    graph = yEdGraph.Graph()
    level_queue = [
        int(settings.PARAMS["user_id"]),
    ]
    count_levels = int(settings.PARAMS["levels"])
    all_level_ids = [list() for _ in range(count_levels)]

    for step in range(count_levels):
        settings.print_message("Process level #{} (total users {})".format(
            step, level_counter))
        logger.debug("Process level #{} (total users {})".format(
            step, level_counter))
        level_counter = len(level_queue)
        for user_index in range(level_counter):
            id = level_queue[user_index]
            total_users += 1
            settings.print_message(
                "Process id {}. User #{} on level #{} (total {})".format(
                    id, user_index, step, level_counter), 2)
            logger.debug(
                "Process id {}. User #{} on level #{} (total {}).".format(
                    id, user_index, step, level_counter))
            try:
                settings.print_message("Add user node in graph.", 3)
                logger.debug("Check user (id={}) in graph".format(id))
                if not id in graph.nodes.keys():
                    logger.debug(
                        "Create user node in graph (id={}).".format(id))
                    logger.debug("Get info for user (id={}).".format(id))
                    try:
                        user_info = api.users.get(user_ids=[id],
                                                  fields="photo_200_orig")
                    except Exception as error:
                        logger.warn(traceback.format_exc())
                        logger.debug(
                            "Can not get info for user (id={}), skip.".format(
                                id))
                        settings.print_message(
                            "Can not get info for user, skip.", 3)
                        count_bad_users += 1
                        continue
                    if not user_info:
                        logger.debug(
                            "Can not get info for user (id={}), skip.".format(
                                id))
                        settings.print_message(
                            "Can not get info for user, skip.", 3)
                        count_bad_users += 1
                        continue
                    user_info = user_info[0]
                    logger.debug("User info='{}'.".format(
                        json.dumps(user_info)))
                    logger.debug("Load user photo (id={}).".format(id))
                    photo = utils.get_request(user_info["photo_200_orig"])
                    if not photo:
                        logger.debug(
                            "Can't loading user photo (id={}).".format(id))
                    info_label = "ID: {} \nФИГ: {} {} \nНик: {} \nПол: {} \nДата рождения: {} \nГород: {} \nСтрана: {}".format(
                        id, user_info["first_name"] if "first_name"
                        in user_info else "----", user_info["last_name"]
                        if "last_name" in user_info else "----",
                        user_info["nickname"] if "nickname" in user_info else
                        "----", utils.SEX[user_info["sex"]]
                        if "sex" in user_info else "----",
                        user_info["bdate"] if "bdate" in user_info else "----",
                        user_info["city"] if "city" in user_info else "----",
                        user_info["country"]
                        if "country" in user_info else "----")
                    graph.add_node(id,
                                   check_existance=False,
                                   label=info_label,
                                   shape="roundrectangle",
                                   font_style="italic",
                                   underlined_text="false",
                                   img=photo,
                                   width="200",
                                   height="200",
                                   border_has_color="false")
                    count_graph_users += 1
                else:
                    logger.debug(
                        "Graph contains user node (id={}).".format(id))
                    settings.print_message(
                        "Graph already contains this user node.", 3)
                settings.print_message("Get friendlist.", 3)
                logger.debug("Get friend for user (id={}).".format(id))
                try:
                    friends = api.friends.get(
                        user_id=id,
                        count=1000000,
                        fields=
                        "nickname, sex, bdate, city, country, photo_200_orig")
                except Exception as error:
                    logger.debug("Can not get friends, skip.")
                    settings.print_message("Can not get friendlist, skip.", 3)
                    count_bad_users += 1
                    continue
                if not friends:
                    logger.debug("Can not get friends, skip.")
                    settings.print_message("Can not get friendlist, skip.", 3)
                    count_bad_users += 1
                    continue
                settings.print_message(
                    "Process friends (total {}, level #{}).".format(
                        len(friends), step + 1), 3)
                logger.debug("Friends count: {}".format(len(friends)))
                _ = [
                    level_queue.append(friend["user_id"]) for friend in friends
                    if not friend["user_id"] in graph.nodes
                ]
                logger.debug("Add node for each friend and create edges.")
                for friend_index, friend in enumerate(friends):
                    total_users += 1
                    logger.debug("Process friend #{} (id={}).".format(
                        friend_index, friend["user_id"]))
                    settings.print_message(
                        "Process friends #{} id={} (total {}, level #{}).".
                        format(friend_index, friend["user_id"], len(friends),
                               step + 1), 4)
                    #if friend_index > 10: break
                    settings.print_message("Add user node in graph.", 5)
                    logger.debug("Check user (id={}) in graph".format(
                        friend["user_id"]))
                    if not friend["user_id"] in graph.nodes.keys():
                        logger.debug(
                            "Create user node in graph (id={}).".format(
                                friend["user_id"]))
                        logger.debug("User info='{}'.".format(
                            json.dumps(friend)))
                        logger.debug("Load user photo (id={}).".format(
                            friend["user_id"]))
                        photo = utils.get_request(friend["photo_200_orig"])
                        if not photo:
                            logger.debug(
                                "Can't loading user photo (id={}).".format(
                                    friend["user_id"]))
                        info_label = "ID: {} \nФИГ: {} {} \nНик: {} \nПол: {} \nДата рождения: {} \nГород: {} \nСтрана: {}".format(
                            friend["user_id"], friend["first_name"]
                            if "first_name" in friend else "----",
                            friend["last_name"] if "last_name" in friend else
                            "----", friend["nickname"] if "nickname" in friend
                            else "----", utils.SEX[friend["sex"]]
                            if "sex" in friend else "----",
                            friend["bdate"] if "bdate" in friend else "----",
                            friend["city"] if "city" in friend else "----",
                            friend["country"]
                            if "country" in friend else "----")
                        graph.add_node(friend["user_id"],
                                       check_existance=False,
                                       label=info_label,
                                       shape="roundrectangle",
                                       font_style="italic",
                                       underlined_text="false",
                                       img=photo,
                                       width="200",
                                       height="200",
                                       border_has_color="false")
                        count_graph_users += 1
                    else:
                        logger.debug(
                            "Graph contains user node (id={}).".format(
                                friend["user_id"]))
                        settings.print_message(
                            "Graph already contains this user node.", 5)
                    logger.debug("Add adge {}-{} in graph.".format(
                        friend["user_id"], id))
                    # if ...
                    graph.add_edge(friend["user_id"],
                                   id,
                                   width="1.0",
                                   color="#000000",
                                   check_existance_nodes=False)
            except Exception as error:
                logger.warn(traceback.format_exc())
                result = "with error"
        level_queue = level_queue[level_counter:]
    logger.debug("Create graphml for graph.")
    graph.construct_graphml()
    logger.debug("Save graphml in file {}.".format(settings.OUTPUT_FILE))
    try:
        with open(settings.OUTPUT_FILE, "w",
                  encoding=settings.OUTPUT_ENCODING) as f:
            f.write(graph.get_graph())
    except Exception as error:
        logger.warn(traceback.format_exc())
        result = "with error"
    return (result, total_users, count_graph_users, count_bad_users)
Esempio n. 33
0
def update(self, pr_id, pr_name, br_name):

    # Generate pseudo-key-id
    __pr_id = "p_" + str(pr_id)
    __br_id = __pr_id + ":" + base64.b16encode(br_name)

    # Data structure for branch's collaborators
    __br_info_collaborators = set()

    # Create Redis Data structure (id + score, in this case timestamp)
    __co_br = []

    # Get all commits from specific branch (gitlab) ids + commit's info
    __co_gl_val = self.gl_instance.get_projects_repository_commits_byId(id=pr_id, ref_name=br_name)
    __co_gl_id = map(lambda x: __pr_id + ":" + x.get("id"), __co_gl_val)
    __co_gl_val = dict(zip(__co_gl_id, __co_gl_val))

    # Get all commits from specific branch (redis) ids + created_at
    __co_rd_id = []
    __co_rd_val = {}

    __prev_info = len(self.rd_instance_br_co.keys(__br_id)) > 0
    if __prev_info:
        __br_info_collaborators = set(eval(self.rd_instance_br.hgetall(__br_id).get("contributors")))
        __co_rd_val = dict(self.rd_instance_br_co.zrange(__br_id, 0, -1, withscores=True))
        __co_rd_id = __co_rd_val.keys()

    # Generate difference and intersection metadata
    __mt_new = list(set(__co_gl_id).difference(set(__co_rd_id)))
    __mt_del = list(set(__co_rd_id).difference(set(__co_gl_id)))
    __mt_mod = list(set(__co_gl_id).intersection(set(__co_rd_id)))

    # Fill branch's commits without deleted
    if __prev_info:
        [__co_br.extend([i, long(__co_rd_val[i])]) for i in __mt_mod]

    # Regenerate structure of branch
    if len(__mt_new) > 0 or len(__mt_del) > 0:
        self.rd_instance_br_co.delete(__br_id)

    # Update or add commits to redis
    for i in __mt_new:

        # Get commit identifier (sha) + info
        __co_id = i
        __co_id_org = str(__co_id).replace(__pr_id + ":", "")

        # Get email from commit and add as contributor
        __co_em = __co_gl_val[__co_id].get("author_email").lower()
        __user_key = base64.b16encode(__co_em)
        __br_info_collaborators.add(__user_key)

        # Get information from gitlab or redis
        if len(self.rd_instance_co.keys(__co_id)) == 0:
            __co_info = __co_gl_val[__co_id]
            st_clean.commit(__co_info)

            # Get commit information from git log
            get_commit_info(pr_id, pr_name, __co_info)
            __co_info["author"] = __user_key

            # Insert commit information
            self.rd_instance_co.hmset(__co_id, __co_info)

        else:
            __co_info = self.rd_instance_co.hgetall(__co_id)

        # Set values at Redis Structure - User
        self.rd_instance_us_co.zadd(__user_key, __br_id + ":" + __co_id_org, long(__co_info.get("created_at")))

        # Set values at Redis Structure - Branch (id + timestamp)
        __co_br.append(__co_id)
        __co_br.append(long(__co_info.get("created_at")))

    for i in __mt_del:

        # Get commit identifier (sha) + info
        __co_id = i
        __co_id_org = str(__co_id).replace(__pr_id + ":", "")
        __co_info = self.rd_instance_co.hgetall(__co_id)

        # Get email from commit and add as contributor
        __co_em = __co_info.get("author_email").lower()
        __user_key = base64.b16encode(__co_em)
        self.rd_instance_us_co.zrem(__user_key, __br_id + ":" + __co_id_org)

    # Check if contributors keep being same
    if len(__mt_del) > 0:
        __br_info_collaborators_tmp = __br_info_collaborators.copy()
        for i in __br_info_collaborators:
            count_co = 0
            __br_us_co = self.rd_instance_us_co.zrange(i, 0, -1)
            for j in __br_us_co:
                if str(j).startswith(__br_id):
                    count_co = 1
                    break
            if count_co == 0:
                __br_info_collaborators_tmp.remove(i)
        __br_info_collaborators = __br_info_collaborators_tmp

    # Inject commits to branch from data structure filled
    if len(__mt_new) > 0 or len(__mt_del) > 0:
        inject.inject_branch_commits(self.rd_instance_br_co, pr_id, br_name, __co_br)

    # Insert information to branch
    self.rd_instance_br.hset(__br_id, "contributors", list(__br_info_collaborators))

    if len(__mt_new) > 0:

        # Print alert
        if config.DEBUGGER:
            config.print_message("* (%d) Added %d Commits" % (int(pr_id), len(__mt_new)))

    if len(__mt_del) > 0:

        # Print alert
        if config.DEBUGGER:
            config.print_message("* (%d) Deleted %d Commits" % (int(pr_id), len(__mt_del)))
Esempio n. 34
0
def _get_info_from_resulting_selection(paper_soup, handling_cluster=False):
    """retrieving data about an article in the resulting selection"""
    # Full info about paper include general and addition information
    # MAYBE no one addition information, because this paper in cluster
    # and for each paper from cluster contains additional info
    settings.print_message("Google scholar:", 2)
    settings.print_message("Get general information.", 3)
    full_info = dict()
    general_information = dict()
    databox = paper_soup.find('div', class_='gs_ri')
    title = databox.find('h3', class_='gs_rt')
    if title.find('span', class_='gs_ct'):  # A citation
        title.span.extract()
    elif title.find('span', class_='gs_ctc'):  # A book or PDF
        title.span.extract()
    general_information['title'] = title.text.strip()
    if title.find('a'):
        general_information['url'] = title.find('a')['href'].strip()

    paperinfo = databox.find('div', class_='gs_a')
    author_list = list()
    author_ref_list = paperinfo('a')
    ref_index = 0
    ref_list_len = len(author_ref_list)
    for auth_shortname in paperinfo.text.split("-")[0].split(","):
        GID = ""
        auth_shortname = auth_shortname.strip(" …\xa0")
        if ref_list_len > ref_index and auth_shortname == author_ref_list[
                ref_index].text:
            GID = re.findall(_CITATIONAUTHRE,
                             author_ref_list[ref_index]['href'].strip())[0]
            ref_index += 1
        author_list.append({"shortname": auth_shortname, "gid": GID})
    general_information['author'] = author_list
    year = re.findall("[0-9]{4}", paperinfo.text)

    if len(year) != 0:
        general_information['year'] = int(year[0])

    # Save general info
    full_info["general_information"] = general_information
    settings.print_message("Title: '%s'" % general_information['title'], 3)
    # Get addition information (maybe paper in cluster then analysis cluster
    # and get additional info for each unique paper in cluster)
    footer_links = databox.find('div', class_='gs_fl').find_all('a')
    settings.print_message("Get additional information.", 3)

    count_sim_papers = 0
    for link in footer_links:
        if 'versions' in link.text or 'версии статьи' in link.text:
            count_sim_papers = int(re.findall(r'\d+', link.text.strip())[0])
            logger.debug("In cluster %i papers." % count_sim_papers)
            general_information["cluster"] = int(
                re.findall(r'\d+', link['href'].strip())[0])
            break

    # check: have paper link to pdf
    # and take this link if exists
    link_to_pdf = _get_url_pdf(paper_soup)
    full_info['link_to_pdf'] = link_to_pdf

    # CLUSTER HANDLER
    if handling_cluster and general_information["cluster"] is not None:
        settings.print_message(
            "In cluster %i similar papers." % count_sim_papers, 3)
        settings.print_message("Cluster handling...", 3)
        different_information = _cluster_handler(
            general_information["cluster"], count_sim_papers)
        if different_information is not None:
            full_info["different_information"] = different_information
            settings.print_message(
                "Versions in cluster: %i." % len(different_information), 3)
            return full_info

    # Paper not in cluster => get addition info for it
    if handling_cluster:
        settings.print_message("Cluster link not exists.", 3)
    else:
        settings.print_message("Don't use google cluster.", 3)
    different_information = list()
    different_information.append(dict())
    is_end_note = False
    for link in footer_links:
        if 'endnote' in link.text.strip().lower():
            is_end_note = True
            end_note = get_info_from_EndNote(link['href'].strip(), True)
            if end_note is not None:
                different_information[0].update(end_note)
            different_information[0]["url_scholarbib"] = link['href'].strip()
        if 'Cited by' in link.text or 'Цитируется' in link.text:
            #utils.get_soup(_HOST + link['href'].strip())
            different_information[0]["citedby"] = int(
                re.findall(r'\d+', link.text)[0])
    if not is_end_note:
        settings.print_message(
            'Error getting EndNote files. '
            'Please change the display settings Google Scholar in English '
            '(https://scholar.google.com/).')
        logger.debug(
            'End work programme because did not find link to EndNote file.')
        input('Press enter to continue')

        #raise Exception('Did not find EndNote.')
    full_info["different_information"] = tuple(different_information)
    return full_info
def save_json_metadata(metadata):
    settings.print_message(" - Saving metadata at json format ... ")
    f = open("generated/data/metadata.json", "w")
    f.write(json.dumps(metadata))
    f.close()
def upload_package():
    settings.print_message(" - Uploading pypi package ... ")
    os.chdir("generated")
    call(["python", "setup.py", "sdist", "register", "upload", "-r", "pypi"])
    os.chdir("./../")
Esempio n. 37
0
def update(self, pr_id, pr_name, br_name):

    # Generate pseudo-key-id
    __pr_id = "p_" + str(pr_id)
    __br_id = __pr_id + ":" + base64.b16encode(br_name)

    # Data structure for branch's collaborators
    __br_info_collaborators = set()

    # Create Redis Data structure (id + score, in this case timestamp)
    __co_br = []

    # Get all commits from specific branch (gitlab) ids + commit's info
    __co_gl_val = self.gl_instance.get_projects_repository_commits_byId(
        id=pr_id, ref_name=br_name)
    __co_gl_id = map(lambda x: __pr_id + ":" + x.get("id"), __co_gl_val)
    __co_gl_val = dict(zip(__co_gl_id, __co_gl_val))

    # Get all commits from specific branch (redis) ids + created_at
    __co_rd_id = []
    __co_rd_val = {}

    __prev_info = len(self.rd_instance_br_co.keys(__br_id)) > 0
    if __prev_info:
        __br_info_collaborators = set(
            eval(self.rd_instance_br.hgetall(__br_id).get("contributors")))
        __co_rd_val = dict(
            self.rd_instance_br_co.zrange(__br_id, 0, -1, withscores=True))
        __co_rd_id = __co_rd_val.keys()

    # Generate difference and intersection metadata
    __mt_new = list(set(__co_gl_id).difference(set(__co_rd_id)))
    __mt_del = list(set(__co_rd_id).difference(set(__co_gl_id)))
    __mt_mod = list(set(__co_gl_id).intersection(set(__co_rd_id)))

    # Fill branch's commits without deleted
    if __prev_info:
        [__co_br.extend([i, long(__co_rd_val[i])]) for i in __mt_mod]

    # Regenerate structure of branch
    if len(__mt_new) > 0 or len(__mt_del) > 0:
        self.rd_instance_br_co.delete(__br_id)

    # Update or add commits to redis
    for i in __mt_new:

        # Get commit identifier (sha) + info
        __co_id = i
        __co_id_org = str(__co_id).replace(__pr_id + ":", "")

        # Get email from commit and add as contributor
        __co_em = __co_gl_val[__co_id].get('author_email').lower()
        __user_key = base64.b16encode(__co_em)
        __br_info_collaborators.add(__user_key)

        # Get information from gitlab or redis
        if len(self.rd_instance_co.keys(__co_id)) == 0:
            __co_info = __co_gl_val[__co_id]
            st_clean.commit(__co_info)

            # Get commit information from git log
            get_commit_info(pr_id, pr_name, __co_info)
            __co_info["author"] = __user_key

            # Insert commit information
            self.rd_instance_co.hmset(__co_id, __co_info)

        else:
            __co_info = self.rd_instance_co.hgetall(__co_id)

        # Set values at Redis Structure - User
        self.rd_instance_us_co.zadd(__user_key, __br_id + ":" + __co_id_org,
                                    long(__co_info.get("created_at")))

        # Set values at Redis Structure - Branch (id + timestamp)
        __co_br.append(__co_id)
        __co_br.append(long(__co_info.get("created_at")))

    for i in __mt_del:

        # Get commit identifier (sha) + info
        __co_id = i
        __co_id_org = str(__co_id).replace(__pr_id + ":", "")
        __co_info = self.rd_instance_co.hgetall(__co_id)

        # Get email from commit and add as contributor
        __co_em = __co_info.get('author_email').lower()
        __user_key = base64.b16encode(__co_em)
        self.rd_instance_us_co.zrem(__user_key, __br_id + ":" + __co_id_org)

    # Check if contributors keep being same
    if len(__mt_del) > 0:
        __br_info_collaborators_tmp = __br_info_collaborators.copy()
        for i in __br_info_collaborators:
            count_co = 0
            __br_us_co = self.rd_instance_us_co.zrange(i, 0, -1)
            for j in __br_us_co:
                if str(j).startswith(__br_id):
                    count_co = 1
                    break
            if count_co == 0:
                __br_info_collaborators_tmp.remove(i)
        __br_info_collaborators = __br_info_collaborators_tmp

    # Inject commits to branch from data structure filled
    if len(__mt_new) > 0 or len(__mt_del) > 0:
        inject.inject_branch_commits(self.rd_instance_br_co, pr_id, br_name,
                                     __co_br)

    # Insert information to branch
    self.rd_instance_br.hset(__br_id, 'contributors',
                             list(__br_info_collaborators))

    if len(__mt_new) > 0:

        # Print alert
        if config.DEBUGGER:
            config.print_message("* (%d) Added %d Commits" %
                                 (int(pr_id), len(__mt_new)))

    if len(__mt_del) > 0:

        # Print alert
        if config.DEBUGGER:
            config.print_message("* (%d) Deleted %d Commits" %
                                 (int(pr_id), len(__mt_del)))