Esempio n. 1
0
def getLinksFromHistory() -> list:
    """
    Get file links from links_history file.
    
    Returns
    -------
    list
        A list of links.
    """

    committee_folder = credentials.getCommitteesDirectory()

    links_history_file = committee_folder + "../links_history.txt"

    history_links = []

    with open(links_history_file, "r") as history_file:

        for link in history_file:

            cleaned_link = link.strip()

            if len(cleaned_link) > 0:
                history_links.append(link.strip())

    return history_links
Esempio n. 2
0
def buildCommittees():
    """
    Create a folder for every committee and place the folder inside
    committee_directory defined in the credentials file.
    """
    committee_names = [
        "Accounting Issues Committee",#
        "Best Practices Committee",
        "Audit Committee",#
        "Corporate Governance",#
        "Finance Committee",
        "Bylaws Committee",#
        "Communications Committee",#
        "Coordinating Committee Chairs Committee",#
        "Core Services Committee",
        "Education Committee",#
        "Information Systems Committee",#
        "IT Advisory and Governance",#
        "Legal Committee",#
        "Member Committee Advisory Committee",#
        "NCIGF Services Committee",#
        "Nominating Committee",
        "Operations Committee",#
        "Public Policy Committee",#
        "Site Selection Committee",#
        "Special Funding Committee",#
        "NCIGF Board"#
    ]

    committee_directory = credentials.getCommitteesDirectory()
    
    for committee_name in committee_names:
        committee_subfolder = Path(committee_directory + "/" + committee_name)
        committee_subfolder.mkdir(parents=True, exist_ok="True")
Esempio n. 3
0
def cleanCommitteesFolder():
    """
    Delete everything in the downloadFolder so the script has a fresh start.

    """
    committees_directory = credentials.getCommitteesDirectory()

    if not os.path.exists(committees_directory):
        return None

    committee_folders = os.listdir(committees_directory)

    for committee_folder in committee_folders:

        committee_folder_path = os.path.join(committees_directory, committee_folder)

        if os.path.isfile(committee_folder_path):
            os.unlink(committee_folder_path)
            continue

        for committee_file_name in os.listdir(committee_folder_path):

            committee_file_path = os.path.join(committee_folder_path, committee_file_name)
            
            try:
                if os.path.isfile(committee_file_path):

                    os.unlink(committee_file_path)

                elif os.path.isdir(committee_file_path): 
                    
                    shutil.rmtree(committee_file_path)
            except:
                logger.error("Could not clean old directory.")
Esempio n. 4
0
def deleteHistory():
    """
    Delete the links_history archive file.
    
    """

    committee_folder = credentials.getCommitteesDirectory()

    links_history_file = committee_folder + "../links_history.txt"

    if os.path.exists(links_history_file):
        os.unlink(links_history_file)
Esempio n. 5
0
def saveLinksFromTaxonomy(links:list):
    """
    Save links retrieved from getLinksFromTaxonomy into a links_history file.
    
    Parameters
    ----------
    links : list
        A list of links.
    
    """


    committee_folder = credentials.getCommitteesDirectory()

    links_history_file = committee_folder + "../links_history.txt"

    with open(links_history_file, "a") as history_file:

        links = ("".join([link, "\n"]) for link in links)

        history_file.writelines(links)
Esempio n. 6
0
def downloadFile(file_href:str):
    """
    Given a node link, download the actual file that belongs to the link and
    place it in the downloadFolder.
    
    Args:
        nodeHREF (str): A node link. Ex: https://member.ncigf.org/node/3544
        downloadFolder (str, optional): Defaults to
    "/home/njennings/minutes_pdfs/". The folder to which the file will be placed.  
    """

    try: 
        file_request = session.get(file_href, allow_redirects=True, stream=True)
    except urllib3.util.ssl_.SSLError:
        logger.error("Download failed. Something went wrong with requesting the file.")

    committee_directory = credentials.getCommitteesDirectory()

    file_name = file_href.split("/")[-1]

    local_path = committee_directory + file_name

    if "draft" in file_name.lower():
        logger.warning("Download failed. Ignoring files with 'draft' in the name.")
        return None

    try:
        with open(local_path, mode="wb") as local_file:
            for file_chunk in file_request.iter_content(chunk_size=1024):
                if file_chunk:
                    local_file.write(file_chunk)

            if os.path.getsize(local_path) > 1:
                return local_path
            else:
                logger.warning("Downloaded file will not be moved due to its tiny size.")
                return None
    except OSError:
        logger.warning("Failed to move file to committee. " + local_path)
        return None
Esempio n. 7
0
def organizeFile(file_path:str):
    """
    Attempt to retrieve the committee to which the file belonged and place it
    under the associated committee folder.
    
    Args:
        file (str): A file path to organize.
    
    Returns:
        str: An absolute file path
    """
    local_file_path = downloadFile(file_path)

    if local_file_path == None:
        return None

    local_file_name = local_file_path.split("/")[-1]
    local_file_name_no_extension = local_file_name.split(".")[0]
    committee_directory = credentials.getCommitteesDirectory()

    local_file_pieces = local_file_name.split(".")

    local_file_name_extension = None
    
    if len(local_file_pieces) > 1: 
        local_file_name_extension = local_file_pieces[-1]
    else:
        logger.warning(local_file_name + " Couldn't find file extension")
        return None

    invalid_extensions = ["msg", "doc"]

    if any(extension.lower() in local_file_name_extension for extension in invalid_extensions):
        logger.warning(local_file_name + " [" + local_file_name_extension + "] Not a valid file format")
        return None

    processed_text = None

    try:
        processed_text = textract.process(local_file_path)
    except Exception:
        logger.warning(local_file_name + " couldn't be processed into text")
        return None

    lines_in_local_file = processed_text.splitlines()

    if "aic" in local_file_name.lower():
        committee_name = "Accounting Issues Committee/"
    elif "bod" in local_file_name.lower() or "directors" in local_file_name.lower():
        committee_name = "NCIGF Board/"
    elif "mac" in local_file_name.lower():
        committee_name = "Member Committee Advisory Committee/"
    elif "communication" in local_file_name.lower():
        committee_name =  "Communications Committee/"
    else:
        committee_name = determineCommittee(lines_in_local_file)

    if committee_name == None:
        logger.warning(local_file_name + " Could not determine committee")
        return None
    else:
        new_file_path_txt = committee_directory + committee_name + local_file_name_no_extension + ".txt"
        new_file_path_pdf = committee_directory + committee_name + local_file_name_no_extension + ".pdf"

        with open(new_file_path_txt, "wb") as new_file:
            new_file.write(processed_text)
            os.rename(local_file_path, new_file_path_pdf)

        return new_file_path_txt