Exemple #1
0
    def test_file_exists(self):
        """Ensure an error is raised when a file exists"""
        with TemporaryDirectory() as temp:
            fp=os.path.join(temp, "qwert.txt")

            with atomic_write(fp, "w") as f:
                f.write("qwert")
            
            assert os.path.exists(fp)

            try:
                with atomic_write(fp, "w") as f:
                    f.write("1234")
            except FileExistsError as e:
                self.assertIsInstance(e, FileExistsError)
Exemple #2
0
def save_followers(file: str, followers: list):  # pragma: no cover
    """Saving followers list locally and counting new followers
    :param file: str filepath of the txt file
    :param followers: usernames list from get_followers
    :return: return the number of new followers
    :rtype: int
    """
    new_followers = 0

    if len(followers) == 0:
        print("No followers found.")
        return 0

    elif os.path.exists(file):
        print("Updating followers.txt ...")
        with open(file, "a+") as f:
            for username in followers:
                if username not in f.read().strip().split():
                    f.write("%s\n" % username)
                    new_followers += 1
            f.close()

    else:
        with atomic_write(file, mode="w", as_file=True) as f:
            print("Creating followers.txt ...")
            for username in followers:
                f.write("%s\n" % username)
                new_followers += 1

    return new_followers
    def test_str_returned(self):
        """
        1. Test that a string is returned if the as_file argument is False.
        2. Test that the object returned is not a string when as_file is True.
        """

        with TemporaryDirectory() as tmp:
            fp = os.path.join(tmp, "asdf.txt")

            with atomic_write(fp, as_file=False) as file:
                self.assertEqual(type(file), str)

        with TemporaryDirectory() as tmp:
            fp = os.path.join(tmp, "asdf.txt")

            with atomic_write(fp, as_file=True) as file:
                self.assertNotEqual(type(file), str)
Exemple #4
0
def excel_2_parquet(data):
    pd_df = pd.read_excel(data)
    df_name, df_ext = os.path.splitext(data)
    parquet_path = df_name + ".parquet"

    with atomic_write(parquet_path, mode="w", as_file=False) as f:
        pd_df.to_parquet(f)

    return parquet_path
Exemple #5
0
def get_img():
    # Find all elements containing img
    page_images = driver.find_elements_by_xpath("//img")

    # Check all images and getting their attribute sizes
    for img in page_images:
        sizes = img.get_attribute("sizes")

        # To find the post's image
        if sizes > "293px":
            # 293px is the size of the post preview
            # Any picture bigger than 293px is a post picture
            # So this will gives you only one url
            url = img.get_attribute("src")

            # Parsing the URL in order to get the filename path
            get_filename = urlparse(url).path

            # Image filename
            img_name = os.path.basename(get_filename)

            # Create folders to store the image
            os.makedirs("data/images", exist_ok=True)

            # Path where the image will be saved
            path = os.path.join(os.path.abspath("data"), "images/", img_name)

            # Making sure the file doesn't exist already
            if not os.path.exists(path):

                # Using package requests to check for any http issue like 4XX or 5XX errors
                headers = {}

                with requests.get(url, stream=True, headers=headers) as req:
                    # Checking if request is successful (None = no error)
                    if req.raise_for_status() is not None:  # pragma: no cover
                        print("Error: URL of picture is incorrect...")
                        pass

                    # Writing file atomically locally
                    with atomic_write(path, as_file=False) as f:
                        print("Saving image...")
                        urlretrieve(url, filename=f)
                        print("Image saved :)")

            else:
                # Pass if the picture is already present in the folder
                print("Picture already exists :(")

            # Returning path of the picture
            return path

        else:  # pragma: no cover
            pass
Exemple #6
0
    def test_file_exists(self):
        """Ensure an error is raised when a file exists"""

        with TemporaryDirectory() as tmp:
            fp = os.path.join(tmp, "asdf.txt")

            with open(fp, "w") as f:
                f.write("test content")
            with self.assertRaises(FileExistsError):
                with atomic_write(fp, "w") as f:
                    f.write("this shouldn't work")
Exemple #7
0
    def test_atomic_failure(self):
        """Ensure that file does not exist after failure during write"""

        with TemporaryDirectory() as tmp:
            fp = os.path.join(tmp, "abcd.txt")

            with self.assertRaises(FakeFileFailure):
                with atomic_write(fp, "w") as f:
                    tmpfile = f.name
                    assert os.path.exists(tmpfile)
                    raise FakeFileFailure()

            assert not os.path.exists(tmpfile)
            assert not os.path.exists(fp)
    def print_reports(self):
        """Method to print all the reports """
        data = confusion_matrix(self.y_test, self.y_pred)
        df_cm = pd.DataFrame(data,
                             columns=np.unique(self.names),
                             index=np.unique(self.names))
        df_cm.index.name = 'Actual'
        df_cm.columns.name = 'Predicted'
        plt.figure(figsize=(10, 8))
        title1 = 'Confusion matrix for ' + self.model_name
        sns.set(font_scale=1.4)  #for label size
        # suffix file with the timestamp
        timestr = datetime.now().strftime("%Y_%m_%d-%I_%M_%S_%p")
        suffix = '_confusion_map_' + timestr + '.png'
        fig_name = f'{self.result_path}{self.model_name}{suffix}'
        sns.heatmap(df_cm,
                    cmap="viridis",
                    annot=True,
                    annot_kws={
                        "size": 16
                    },
                    fmt='d').set_title(title1)
        # save the classification heatmap
        plt.savefig(fig_name, dpi=300)
        print(
            classification_report(self.y_test,
                                  self.y_pred,
                                  target_names=self.names))
        report = classification_report(self.y_test,
                                       self.y_pred,
                                       target_names=self.names,
                                       output_dict=True)
        # frame the dataframe object for the classification report
        df = pd.DataFrame(report).transpose()
        report_file = self.model_name + '_clf_report' + timestr + '.csv'
        filename = f'{self.result_path}{report_file}'

        # get the atomic writer setup
        if os.path.exists(filename) == False:
            # atomically write csv file by getting the path where to write it
            with atomic_write(filename, "w", False) as f:
                dir_path = f
            if self.test_flag == "No":
                new_file = f'{dir_path}/results/{report_file}'
            else:
                new_file = f'{dir_path}/test/results/{report_file}'
            df.to_csv(new_file)
        self.plot_clasf_map()
Exemple #9
0
    def test_atomic_write(self):
        """Ensure file exists after being written successfully"""

        with TemporaryDirectory() as tmp:
            fp = os.path.join(tmp, "abcd.txt")

            with atomic_write(fp, "w") as f:
                assert not os.path.exists(fp)
                tmpfile = f.name
                f.write("1234")

            assert not os.path.exists(tmpfile)
            assert os.path.exists(fp)

            with open(fp) as f:
                self.assertEqual(f.read(), "1234")
    def test_file_exists(self):
        """Ensure an error is raised when a file exists"""

        # Create a new temporary file in a temporary directory
        with TemporaryDirectory() as tmp:
            fp = os.path.join(tmp, "asdf.txt")

            # Create a fake file with the same name in the same temp directory.
            with open(fp, "w") as existing_file:
                existing_file.write("I Exist")

            # Use assertRaises as a context manager testing if atomic_writer raises an exception when the file exists.
            with self.assertRaises(FileExistsError):
                # Try atomically writing to another file with the same name.
                with atomic_write(fp) as file:
                    file.write("This file should not be written")
    def test_atomic_failure(self):
        """Ensure that file does not exist after failure during write"""

        # Create a new temporary file in a temporary directory
        with TemporaryDirectory() as tmp:
            fp = os.path.join(tmp, "asdf.txt")

            # Create a fake failure. Temp file exists but before writing a FakeFailFailure() is raised.
            with self.assertRaises(FakeFileFailure):
                with atomic_write(fp, "w") as f:
                    tmpfile = f.name
                    assert os.path.exists(tmpfile)
                    raise FakeFileFailure()

            # After the FakeFailFailure(), ensure that the temporary file path and permanent file path do not exist.
            assert not os.path.exists(tmpfile)
            assert not os.path.exists(fp)
    def test_other_kwargs(self):
        """ Test that the atomic_write method can accept other keyword arguments.
        """

        # Create a new temporary file in a temporary directory
        with TemporaryDirectory() as tmp:
            fp = os.path.join(tmp, "asdf.txt")

            # Write a character using UTF-8 encoding
            with atomic_write(fp, encoding='UTF-8') as file:
                first_ln = chr(57344)
                file.write(first_ln)

            # Read back what was written to the file and ensure it is in UTF-8.
            with open(fp, 'r') as opened_file:
                line_file = opened_file.read()
                self.assertEqual(line_file, '\ue000')
Exemple #13
0
def get_vid():
    # Find the first element containing video
    # The first element will always be the post's video
    page_video = driver.find_element_by_xpath("//video")

    # Getting the URL of the video
    # url = page_video.get_attribute("src")
    # On December 8th Instagram changed something here which affected src that we were getting
    # Due to this and for now we will only save the first frame of the video as jpg - speed up bot
    url = page_video.get_attribute("poster")

    # Parsing the URL in order to get the filename path
    get_filename = urlparse(url).path

    # Video filename
    filename = os.path.basename(get_filename)

    # Create folders to store the video
    os.makedirs("data/videos", exist_ok=True)

    # Path where the video will be saved
    path = os.path.join("data/videos/", filename)

    # Making sure the video doesn't exist already
    if not os.path.exists(path):

        # Using package requests to check for any http issue like 4XX or 5XX errors
        with requests.get(url, stream=True) as req:
            # Checking if request is successful (None = no error)
            if req.raise_for_status() is not None:  # pragma: no cover
                print("Error: URL of video is incorrect :(")
                pass

            # Writing file atomically locally
            with atomic_write(path, as_file=False) as f:
                urlretrieve(url, filename=f)
                print("Video saved :)")

        # Returning the video path in order to use with other functions
        return path

    else:
        # Pass if the video is already present in the folder
        print("Video already exists :(")
        return path
    def test_atomic_write(self):
        """Ensure file exists after being written successfully"""

        # Create a new temporary directory
        with TemporaryDirectory() as tmp:
            # Join the tmp file directory to the made up file name and store as a file path
            fp = os.path.join(tmp, "asdf.txt")

            # use the atomic_write method as a context manager.
            with atomic_write(fp, "w") as f:
                assert not os.path.exists(fp)
                tmpfile = f.name
                f.write("asdf")

            # After the atomic_write method finishes, make sure the tmpfile is removed.
            assert not os.path.exists(tmpfile)

            # Make sure that the non-temp file exists after the atomic_write context manager finishes
            assert os.path.exists(fp)

            # Check that the new atomically written file was written correctly.
            with open(fp) as f:
                self.assertEqual(f.read(), "asdf")
Exemple #15
0
def dash_data(file: str, save_to: str):
    """ Get dashboard data, format to pd, saving relevant columns to a parquet file"""

    # Deleting old parquet file - This will be improved in future versions
    if os.path.exists(save_to):
        delete_file(save_to)

    # Copying locally the excel file to a parquet file
    # This file will be used to retrieve the data the user provided
    with atomic_write(save_to, mode="w", as_file=False) as f:
        print("Loading user data...")
        # Read dashboard excel sheets and merge them into one single pandas DataFrame
        df = pd.concat(
            pd.read_excel(file, sheet_name=["main", "comments"]),
            ignore_index=True,
        )
        # Get only the columns object, tags, and accounts
        df = df[["object", "hashtags", "accounts", "comments"]]
        # Drop any row that has only NaNs in it
        df = df.dropna(axis=0, how="all")
        # Drop row 0 that contains the name of the columns (see on excel file row 14)
        # df = df.iloc[1:]
        # Convert to a parquet file format
        df.to_parquet(f)
    def normalize_save_data(self):
        """Function to normalize and save the data in CSV file"""
        #print("===============================================\n")
        if self.suffix == "mat":
            arrIP, gt = self.mat.get_data()  # get data from mat type file
        elif self.suffix == "npy":
            arrIP, gt = self.nmpy.get_data()  # get data from mat type file
        else:
            print("Unsupported file type")
            raise AttributeError()
        #print("===================+++++++++================\n")

        # reshaping the data for the classification
        X = np.reshape(arrIP,
                       (arrIP.shape[0] * arrIP.shape[1], arrIP.shape[2]))

        # Normalisation of data
        normalized_X = preprocessing.normalize(X)

        # converting numpy array to dataframe
        # please note that I am using normalized data to create the Data Frame which will be used
        # for further processing.
        df = pd.DataFrame(data=normalized_X)
        df_class = pd.DataFrame(data=gt.ravel())
        df = pd.concat([df, df_class], axis=1)

        #Override the default initialized values of r1 and r2 using dict of object
        self.lis.__dict__['r1'] = 1
        self.lis.__dict__['r2'] = arrIP.shape[
            2]  # get the number of spectral bands
        list3 = self.lis.createfilelist()
        # set the names of the columns in the dataset
        df.columns = [f'band{i}' for i in list3] + ['classes']
        # filename=f'{self.result_path}image_norm.parquet' # worked with cli
        # df.to_parquet(filename,engine='fastparquet',compression=None) #
        # filename2=f'{self.result_path}image_norm.csv'
        # df.to_csv(filename2)

        filename = f'{self.result_path}image_norm.parquet'  # worked with cli
        if os.path.exists(filename) == False:
            # atomically write parquet file by getting the path where to write it
            with atomic_write(filename, "w", False) as f:
                dir_path = f
            if self.test_flag == "No":
                new_file = '{0}/results/image_norm.parquet'.format(
                    str(dir_path))  # worked with cli
            else:
                new_file = '{0}/test/results/image_norm.parquet'.format(
                    str(dir_path))  # worked with pytest
            df.to_parquet(new_file, engine='fastparquet', compression=None)

        filename2 = f'{self.result_path}image_norm.csv'
        if os.path.exists(filename2) == False:
            # atomically write csv file by getting the path where to write it
            with atomic_write(filename2, "w", False) as f:
                dir_path = f
            if self.test_flag == "No":
                new_file2 = '{0}/results/image_norm.csv'.format(str(dir_path))
            else:
                new_file2 = '{0}/test/results/image_norm.csv'.format(
                    str(dir_path))
            df.to_csv(new_file2)