def aggregateData(self):
        # Create new directory for storing aggregated data
        # download_folder = path.abspath(path.join(self.download_dir, pardir))
        aggregateFolder = DirManager(['aggregated_data'])
        aggregateFolder.createFolder()
        new_folder = aggregateFolder.getDirectory()
        new_csv_file = '{}/data.csv'.format(new_folder)

        insertColumsFolder = self.insertCandidateFolder.getDirectory()
        filenames = sorted([
            insertColumsFolder + "/" + f for f in listdir(insertColumsFolder)
        ],
                           key=path.getmtime)

        with open(new_csv_file, 'w') as new_aggregate_csv:
            new_worksheet = csv.writer(new_aggregate_csv,
                                       quoting=csv.QUOTE_ALL)

            # Loop through all workbooks (EXCEL)
            header = False
            for filename in filenames:
                # Open worksheet
                wb = xlrd.open_workbook(filename)
                sheet = wb.sheet_by_index(0)

                # Only pull excel header from the first file to reduce duplicates
                if header:
                    for rownum in range(1, sheet.nrows):
                        new_worksheet.writerow(sheet.row_values(rownum))
                else:
                    for rownum in range(sheet.nrows):
                        new_worksheet.writerow(sheet.row_values(rownum))
                    header = True
Example #2
0
 def test_empty_dir(self):
     dirManager = DirManager()
     tmpdir = tempfile.mkdtemp()
     open(os.path.join(tmpdir, "test"), "w").close()
     open(os.path.join(tmpdir, "other"), "w").close()
     dirManager.add_directory(tmpdir)
     self.assertTrue(isinstance(dirManager.list_available(), dict))
Example #3
0
    def insertCandidates(self, numDownloads, CandidateName):
        print('Processing {} for {}'.format(numDownloads, CandidateName))

        insertCandidateFolder = DirManager(['insertCandidateControlled'])
        insertCandidateFolder.createFolder()
        new_folder = insertCandidateFolder.getDirectory()

        filenames = sorted(
            [self.download_dir + "/" + f for f in listdir(self.download_dir)],
            key=path.getmtime)
        candidateHeader = "CandidateControlledName"

        for fullfilepathname in filenames[-numDownloads:]:
            filename = path.basename(fullfilepathname)

            wb = xlrd.open_workbook(fullfilepathname,
                                    logfile=open(devnull, 'w'))
            errordTypes = [
                'Cmte_ID', 'Intr_Nam L', 'Intr_City', 'Intr_ST', 'Off_S_H_Cd',
                'XRef_Match'
            ]
            data = pd.read_excel(
                wb, dtype={datatype: str
                           for datatype in errordTypes})

            if CandidateName == "   ":
                data.insert(0, candidateHeader, "Independent")
            else:
                data.insert(0, candidateHeader, CandidateName)

            data.to_excel('{}/{}'.format(new_folder, filename), index=False)
Example #4
0
 def __init__(self,
              path,
              prefix,
              cauldronAddr,
              ignore=[
                  r'^\.', r'\.x?swp$', r'~', r'^__', r'__$', r'\.jar$',
                  r'\.db$'
              ]):
     syslog.openlog(ident="X3Coven", facility=syslog.LOG_DAEMON)
     self.path = path
     self.prefix = prefix
     self.cauldronAddr = cauldronAddr
     self.plugins = {}
     self.plugins_lock = Lock()
     self.confRE = re.compile('^#\\s*x3\.([a-z0-9.]+)\\s*=\\s*(.*)\\s*$')
     self.dirManager = DirManager(self.path, self, ignore=ignore)
     self.cauldron = CauldronSender(self.cauldronAddr)
Example #5
0
    def __init__(self):
        self.DEFAULT_SLEEP_TIME = 5
        self.SEARCH_FORM_ADDRESS = "https://www.southtechhosting.com/SanJoseCity/CampaignDocsWebRetrieval/Search/SearchByElection.aspx"

        # create data folder in current directory to store files
        self.website = SjcWebsite()
        self.new_dir = DirManager(["data"])
        self.new_dir.createFolder()
        self.download_dir = self.new_dir.getDirectory()

        self.website.preprocessing = PreProcessing(self.download_dir)

        options = webdriver.ChromeOptions()

        # Uncomment block BELOW for headless data-retrieval
        # --> Currently not working 100%, only downloads first link on form table
        isHeadless = os.environ.get('HEADLESS', False)
        if isHeadless:
            options.add_argument("--headless")
        # options.add_argument("--disable-gpu")
        # options.add_argument("--window-size=1280,800")
        # Uncomment block ABOVE for headless data-retrieval

        options.add_argument("--ignore-certificate-errors")
        options.add_argument("--test_type")
        options.add_argument("--no-sandbox")
        options.add_argument("start-maximized")
        options.add_argument("disable-infobars")
        options.add_argument("--disable-extensions")
        plugs = {"enabled": False, "name": "Chrome PDF Viewer"}
        prefs = {
            "download.default_directory": self.download_dir,
            "download.prompt_for_download": False,
            "download.directory_upgrade": True,
            "safebrowsing.enabled": False,
            "safebrowsing.disable_download_protection": True,
            "plugins.plugins_list": [plugs],
        }
        options.add_experimental_option("prefs", prefs)
        self.driver = webdriver.Chrome(ChromeDriverManager().install(),
                                       options=options)
    def insertColumns(self, numDownloads, CandidateName, ElectionDate,
                      BallotItem):
        print('Processing {} for {}'.format(numDownloads, CandidateName))

        if numDownloads == 0:
            return

        self.insertCandidateFolder = DirManager(['insertedData'])
        self.insertCandidateFolder.createFolder()
        new_folder = self.insertCandidateFolder.getDirectory()
        filenames = self.insertColumnsHelper()

        candidateHeader = "CandidateControlledName"
        electionDateHeader = "Election Date"
        ballotItemHeader = "Ballot Item"

        print(filenames)
        for fullfilepathname in filenames[-numDownloads:]:
            filename = path.basename(fullfilepathname)
            print(filename)

            wb = xlrd.open_workbook(fullfilepathname,
                                    logfile=open(devnull, 'w'))
            errordTypes = [
                'Cmte_ID', 'Intr_Nam L', 'Intr_City', 'Intr_ST', 'Off_S_H_Cd',
                'XRef_Match'
            ]
            data = pd.read_excel(
                wb, dtype={datatype: str
                           for datatype in errordTypes})

            if CandidateName == "   ":
                data.insert(0, candidateHeader, "Independent")
            else:
                data.insert(0, candidateHeader, CandidateName)

            data.insert(0, electionDateHeader, ElectionDate)
            data.insert(0, ballotItemHeader, BallotItem)

            data.to_excel('{}/{}'.format(new_folder, filename), index=False)
Example #7
0
 def test_found_entry(self):
     dirManager = DirManager()
     dirManager.add_directory(test_dir)
     ans = dirManager.found_entry(dot)
     self.assertTrue(ans == "../test_files/DOt_-_05_-_IMF.mp3")
Example #8
0
 def test_random_list(self):
     dirManager = DirManager()
     dirManager.add_directory("../test_files/")
     x = dirManager.random_list()
     self.assertTrue(True)
Example #9
0
 def test_available_dict(self):
     dirManager = DirManager()
     json = dict(dirManager.list_available_nested_dict())
     self.assertTrue(isinstance(json, dict))
Example #10
0
 def test_available(self):
     dirManager = DirManager()
     self.assertTrue(isinstance(dirManager.list_available(), dict))
Example #11
0
 def test_get_directory(self):
     dirManager = DirManager()
     dirManager.add_directory("./teste/")
     self.assertTrue("./teste/" in dirManager.get_directory())
     self.assertTrue("./teste/" == dirManager.get_directory("./teste/"))
Example #12
0
 def setUp(self):
     self.dm = DirManager()
     self.dm.add_directory(test_dir)
Example #13
0
 def setUpClass(cls):
     global dirmanager
     config.base_dir = tempfile.mkdtemp()
     config.__reload__()
     dirmanager = DirManager()