Beispiel #1
0
 def __init__(self, in_file_directory, in_file_name, 
                  in_extension, in_delimiter, in_missing_value,
                  in_quote, 
                  out_file_directory, out_file_name, 
                  out_extension, out_delimiter, 
                  out_missing_value,
                  out_file_single_file, out_file_separate_line,
                  raw_data_structure, clean_data_structure
                  ):
         
         # initializing file handlers for in and out files
         self.in_file_handler                    = FileHandler (in_file_directory, 
                                                                in_file_name, in_extension, in_delimiter)
         if out_file_single_file == 'on':
             if out_file_separate_line == 'title':
                 out_file_separate_line = in_file_name
                                                                    
         self.out_file_handler                   = FileHandler (out_file_directory, 
                                                                out_file_name, out_extension, out_delimiter,
                                                                out_file_single_file, out_file_separate_line)
         # initializing data handler for raw and clean data
         self.raw_data_handler                   = DataHandler()
         self.raw_data_handler.structure         = raw_data_structure
         self.clean_data_handler                 = DataHandler()
         self.clean_data_handler.structure       = clean_data_structure
         # initialize the error message string
         self.error_message                      = ''
         self.unquote                            = False
         if in_quote == 'yes':
                 self.unquote                    = True
         self.in_missing_value                   = in_missing_value
         self.out_missing_value                  = out_missing_value
Beispiel #2
0
 def test_saveAnyDataTo(self):
     m = mock_open()
     with patch("builtins.open", m, create=True) as mock_file:
         with patch('os.path.isdir') as dir_mock:
             dir_mock.return_value = lambda x: True
             FileHandler.saveAnyDataTo("test data", 'abc/images.jpg')
             m.assert_called_with('abc/images.jpg', 'wb+')
Beispiel #3
0
 def saveTo(self, dir=os.path.join(ROOT_DIR, 'downloads')):
     # by default the images are stored in a <root>/downloads folder
     self.dir = dir
     for image, fileName in self.getImages():
         if image is None:
             continue
         FileHandler.saveAnyDataTo(image.content,
                                   os.path.join(self.dir, fileName))
Beispiel #4
0
def main():

    inote = sys.argv[1]

    try:

        c = _get_config_handler()
        p = PrintHandler(c)
        a = ArgumentHandler(c, p, inote)
        f = FileHandler(c, a, p, inote)
        n = NetworkHandler(c, f, p)

        o = OSHandler(c, a, f, p)
        o.create_temp_replica_fs()
        o.upload()

        n.notify()

    except:
        pass

    finally:

        try:
            # A bit hacky, just to clean up if possible
            o.cleanup()
        except:
            pass

    print()  # For interactive sessions
Beispiel #5
0
def initialize():
    ''' This is a function that prints Hello World to the console
    '''
    # get keys
    with open('keys.json') as f:
        data = json.load(f)

    print("Starting...")

    # inititalize TweetManager
    t = TweetManager(data)

    field_names = ['id', 'name', 'created_at', 'text', 'media_type']

    data = t.get_all_tweets(src.teams.nfl, field_names)
    f = FileHandler(data, field_names)

    print("Finished")
Beispiel #6
0
class FileHandlerTest(unittest.TestCase):
    def setUp(self) -> None:
        self.path1 = 'xyz'
        with patch("builtins.open",
                   mock_open(read_data="test data")) as mock_file:
            self.sut = FileHandler(self.path1)

    def test_funnelFileFormat_path(self):
        with patch("builtins.open",
                   mock_open(read_data="test data")) as mock_file:
            sut = FileHandler(self.path1)
            assert sut.data == "test data"
            mock_file.assert_called_with(self.path1, 'r')

    def test_funnelFileFormat_file(self):
        file1 = StringIO("test data")
        sut = FileHandler(file1)
        assert sut.data == "test data"

    def test_readFile_valid(self):
        with patch("builtins.open",
                   mock_open(read_data="test data")) as mock_file:
            data = self.sut.readFile(self.path1)
            assert data == "test data"
            mock_file.assert_called_with(self.path1, 'r')

    def test_readFile_invalid(self):
        data = self.sut.readFile("abc")
        assert data == None

    def test_toString(self):
        assert self.sut.toString() == "test data"

    def test_saveAnyDataTo(self):
        m = mock_open()
        with patch("builtins.open", m, create=True) as mock_file:
            with patch('os.path.isdir') as dir_mock:
                dir_mock.return_value = lambda x: True
                FileHandler.saveAnyDataTo("test data", 'abc/images.jpg')
                m.assert_called_with('abc/images.jpg', 'wb+')

    def test_saveDataTo(self):
        m = mock_open()
        with patch("builtins.open", m, create=True) as mock_file:
            with patch('os.path.isdir') as dir_mock:
                dir_mock.return_value = lambda x: True
                self.sut.saveDataTo('abc/images.jpg')
                m.assert_called_with('abc/images.jpg', 'wb+')

    def test_getFileAsList(self):
        self.sut.data = 'abc\ndef'
        assert self.sut.getFileAsList() == ['abc', 'def']
Beispiel #7
0
        def map_data ( self, file_directory, file_name,
                      file_extension, file_delimiter,
                      replace_ids,
                      kept_id_position, lost_id_position, target_positions, 
                      drop_unreferenced_entries, target_unreferenced_entries,
                      drop_ghosts, 
                      remove_duplicates, target_duplicates_set,
                      merge_entries, target_merge_set, commands ):

                # 1 - initialize file handler
                mapper_file_handler         = FileHandler(file_directory, file_name,
                                                         file_extension, file_delimiter)
                # TODO
                # if not self.mergerFileHandler.check_valid():
                        # stop_system(_mergFileHandler.error_message)
                        
                # 2 - import merge info from file
                mapper_file_handler.import_data(False)
                
                # 3 - initialize data handler
                mapper_data_handler         = DataHandler()
                
                # 4 - transfer data
                mapper_data_handler.data    = list(mapper_file_handler.data)
                
                # 5 - prepare positions from names
                # target positions
                target_position_index = []        
                if target_positions != 'off':
                    target_position_index = self.get_indeces_from_clean_structure (target_positions)
               
                # unreferenced entities
                target_unreferenced_entries_index = []
                if drop_unreferenced_entries != 'off':
                    target_unreferenced_entries_index = self.get_indeces_from_clean_structure (target_unreferenced_entries)
                    
                # duplicate defining set positions
                target_duplicates_set_index   = []
                if remove_duplicates != 'off':
                    target_duplicates_set_index = self.get_indeces_from_clean_structure (target_duplicates_set)
               
                # merge defining set positions    
                target_merge_set_index = []
                if merge_entries != 'off':
                    target_merge_set_index = self.get_indeces_from_clean_structure (target_merge_set)
                    
                # 6 - get the translated commands
                commands = self.get_commands(commands, target_merge_set_index)
                    
                # 7 - initialize a mapper with all info
                mapper      = Mapper(list(self.clean_data_handler.data_final), 
                                                                mapper_data_handler.data,
                                                                replace_ids,
                                                                kept_id_position, lost_id_position,
                                                                target_position_index, 
                                                                drop_unreferenced_entries, target_unreferenced_entries_index,
                                                                drop_ghosts, 
                                                                remove_duplicates, target_duplicates_set_index,
                                                                merge_entries, target_merge_set_index, 
                                                                commands)   
                
                return [list(mapper.mapped_data), mapper.str_counter_ids]
Beispiel #8
0
class Analyzer:

        # -------------------------------------------------------------
        # 
        #  __init__ (in_file_directory, in_file_name, 
#                         in_extension, in_delimiter, in_missing_value,
#                         in_quote, 
#                         out_file_directory, out_file_name, 
#                         out_extension, out_delimiter, 
#                         out_missing_value,
#                         raw_data_structure, clean_data_structure) 
        # 
        # -------------------------------------------------------------

        def __init__(self, in_file_directory, in_file_name, 
                         in_extension, in_delimiter, in_missing_value,
                         in_quote, 
                         out_file_directory, out_file_name, 
                         out_extension, out_delimiter, 
                         out_missing_value,
                         out_file_single_file, out_file_separate_line,
                         raw_data_structure, clean_data_structure
                         ):
                
                # initializing file handlers for in and out files
                self.in_file_handler                    = FileHandler (in_file_directory, 
                                                                       in_file_name, in_extension, in_delimiter)
                if out_file_single_file == 'on':
                    if out_file_separate_line == 'title':
                        out_file_separate_line = in_file_name
                                                                           
                self.out_file_handler                   = FileHandler (out_file_directory, 
                                                                       out_file_name, out_extension, out_delimiter,
                                                                       out_file_single_file, out_file_separate_line)
                # initializing data handler for raw and clean data
                self.raw_data_handler                   = DataHandler()
                self.raw_data_handler.structure         = raw_data_structure
                self.clean_data_handler                 = DataHandler()
                self.clean_data_handler.structure       = clean_data_structure
                # initialize the error message string
                self.error_message                      = ''
                self.unquote                            = False
                if in_quote == 'yes':
                        self.unquote                    = True
                self.in_missing_value                   = in_missing_value
                self.out_missing_value                  = out_missing_value

        # -------------------------------------------------------------
        # 
        #  check_valid ()
        # 
        # -------------------------------------------------------------

        def check_valid(self):

                valid = True

                if not self.in_file_handler.check_valid():
                        self.error_message      = self.error_message + self.in_file_handler.error_message
                        valid                   = False

                if not self.out_file_handler.check_valid():
                        self.error_message      = self.error_message + self.out_file_handler.error_message
                        valid                   = False

                if not self.raw_data_handler.check_valid():
                        self.error_message      = self.error_message + self.raw_data_handler.error_message
                        valid                   = False

                if not self.clean_data_handler.check_valid():
                        self.error_message      = self.error_message + self.clean_data_handler.error_message
                        valid                   = False

                return valid

        # -------------------------------------------------------------
        # 
        #  structure_raw_data ()
        # 
        # -------------------------------------------------------------

        def structure_raw_data(self):

                # 1- import data from file
                self.in_file_handler.import_data(False, self.in_missing_value)  # If True show first 1000, if FALSE all dataset
                self.in_file_handler.data       = filter(None, self.in_file_handler.data)
                if self.unquote == True:
                       self.in_file_handler.unquote_data()
                # 2- transfer data to data handler
                self.raw_data_handler.data      = list(self.in_file_handler.data)
                # 3- structure the data
                self.raw_data_handler.structure_data()
                
        # -------------------------------------------------------------
        # 
        #  generate_clean_data ()
        # 
        # -------------------------------------------------------------

        def generate_clean_data(self):

                # 1- transfer data to clean data handler
                self.clean_data_handler.data        = list(self.raw_data_handler.data_final)
                # 2- clean the data given the raw data structure
                self.clean_data_handler.clean_data(self.raw_data_handler.structure)

        # -------------------------------------------------------------
        # 
        #  save_clean_data ()
        # 
        # -------------------------------------------------------------

        def save_clean_data(self):

                # 1- transfer clean data to file handler
                self.out_file_handler.data      = list(self.clean_data_handler.data_final)
                # 2- save the data
                id_list = []
                for line in self.clean_data_handler.data_final :
                    #line = line.split(',')
                    id_list.append(line[0])
                    id_list.append(line[1])

                self.out_file_handler.write_data_file(self.out_missing_value)

        # -------------------------------------------------------------
        # 
        #  map_data (file_directory, file_name,
        #              file_extension, file_delimiter,
        #              kept_id_position, lost_id_position, target_positions, 
        #              drop_unreferenced_entries, target_unreferenced_entries,
        #              drop_ghosts, 
        #              remove_duplicates, target_duplicates_set,
        #              merge_entries, target_merge_set, commands )
        # 
        # -------------------------------------------------------------

        def map_data ( self, file_directory, file_name,
                      file_extension, file_delimiter,
                      replace_ids,
                      kept_id_position, lost_id_position, target_positions, 
                      drop_unreferenced_entries, target_unreferenced_entries,
                      drop_ghosts, 
                      remove_duplicates, target_duplicates_set,
                      merge_entries, target_merge_set, commands ):

                # 1 - initialize file handler
                mapper_file_handler         = FileHandler(file_directory, file_name,
                                                         file_extension, file_delimiter)
                # TODO
                # if not self.mergerFileHandler.check_valid():
                        # stop_system(_mergFileHandler.error_message)
                        
                # 2 - import merge info from file
                mapper_file_handler.import_data(False)
                
                # 3 - initialize data handler
                mapper_data_handler         = DataHandler()
                
                # 4 - transfer data
                mapper_data_handler.data    = list(mapper_file_handler.data)
                
                # 5 - prepare positions from names
                # target positions
                target_position_index = []        
                if target_positions != 'off':
                    target_position_index = self.get_indeces_from_clean_structure (target_positions)
               
                # unreferenced entities
                target_unreferenced_entries_index = []
                if drop_unreferenced_entries != 'off':
                    target_unreferenced_entries_index = self.get_indeces_from_clean_structure (target_unreferenced_entries)
                    
                # duplicate defining set positions
                target_duplicates_set_index   = []
                if remove_duplicates != 'off':
                    target_duplicates_set_index = self.get_indeces_from_clean_structure (target_duplicates_set)
               
                # merge defining set positions    
                target_merge_set_index = []
                if merge_entries != 'off':
                    target_merge_set_index = self.get_indeces_from_clean_structure (target_merge_set)
                    
                # 6 - get the translated commands
                commands = self.get_commands(commands, target_merge_set_index)
                    
                # 7 - initialize a mapper with all info
                mapper      = Mapper(list(self.clean_data_handler.data_final), 
                                                                mapper_data_handler.data,
                                                                replace_ids,
                                                                kept_id_position, lost_id_position,
                                                                target_position_index, 
                                                                drop_unreferenced_entries, target_unreferenced_entries_index,
                                                                drop_ghosts, 
                                                                remove_duplicates, target_duplicates_set_index,
                                                                merge_entries, target_merge_set_index, 
                                                                commands)   
                
                return [list(mapper.mapped_data), mapper.str_counter_ids]
                
        # -------------------------------------------------------------
        # 
        #  get_index_from_clean_structure (position) 
        # 
        # -------------------------------------------------------------
        
        def get_indeces_from_clean_structure (self, positions): 
            
            admitted_all_expression = ['all', 'All', ' ', '']
            indeces = []            
            if positions in admitted_all_expression:
                # take all positions
                indeces = range(len(self.clean_data_handler.structure)/2)
            else:
                # remove spaces before or after the ','
                positions = positions.replace(', ',',')
                positions = positions.replace(' ,',',')
                list_positions = positions.split(',')
                
                for position in list_positions:
                    if position in self.clean_data_handler.structure:
                        index = [i for i in range(len(self.clean_data_handler.structure)) if self.clean_data_handler.structure[i] == position][0]/2
                        indeces.append(index)
                    else:
                        sys.exit("ERROR: variable {0} not found in the clean data structure".format(position))
            return indeces
            
        # -------------------------------------------------------------
        # 
        #  get_commands (commands, other_fields)
        # 
        # -------------------------------------------------------------
        
        def get_commands (self, commands, other_fields): 
            
            final_commands = dict()
            
            admitted_command_expression = ['+', 'same', 'avg']
            
            
            list_commands = commands.replace(', ',',')
            list_commands = list_commands.replace(' ,',',')
            list_commands = list_commands.split(',')
            
            for command in list_commands:
                command = command.replace(' :',':')
                command = command.replace(': ',':')
                command = command.split(':')
                if command[0] in self.clean_data_handler.structure:
                        index = [i for i in range(len(self.clean_data_handler.structure)) if self.clean_data_handler.structure[i] == command[0]][0]/2
                        if command[1] in admitted_command_expression:
                            final_commands[str(index)] = command[1]
                        else:
                            sys.exit("ERROR: Command \'{0}\' not admitted".format(command[1]))
                else:
                    sys.exit("ERROR: In the Command list:\n variable \'{0}\' not found in the clean data structure".format(command[0]))
            
            # check if fields in the command are not in other_fields (fields used for merging recognition)
            for i in range(len(self.clean_data_handler.structure)/2):
                if str(i) not in final_commands:
                    if i not in other_fields:
                        final_commands[str(i)]='same'
                else:
                    if i in other_fields:
                        sys.exit("ERROR: In the Command list:\n variable \'{0}\' given for the merging set and the command set".format(command[0]))
            
            return final_commands

                
        # -------------------------------------------------------------
        # 
        #  get_clean_data ()
        # 
        # -------------------------------------------------------------

        def get_clean_data(self):

                consist_data = self.clean_data_handler.data_final
                return consist_data

        # -------------------------------------------------------------
        # 
        #  get_merged_data ()
        # 
        # -------------------------------------------------------------

        def get_merged_data(self):
                return self.Merger.data_merged
Beispiel #9
0
 def fromFile(self, path):
     file_handler = FileHandler(path)
     self.urls = file_handler.getFileAsList()
     logging.info('list of urls - \n{}'.format('\n'.join(self.urls)))
     # returning self for enabling method chaining
     return self
Beispiel #10
0
def main():
    folder = FileHandler()
    folder.create_folder()
    folder.move_file()
    for file in folder.fb2_books(folder.path_input):
        Connection().for_main(Model(folder.path_input, file))
Beispiel #11
0
 def setUp(self) -> None:
     self.path1 = 'xyz'
     with patch("builtins.open",
                mock_open(read_data="test data")) as mock_file:
         self.sut = FileHandler(self.path1)
Beispiel #12
0
 def test_funnelFileFormat_file(self):
     file1 = StringIO("test data")
     sut = FileHandler(file1)
     assert sut.data == "test data"
Beispiel #13
0
 def test_funnelFileFormat_path(self):
     with patch("builtins.open",
                mock_open(read_data="test data")) as mock_file:
         sut = FileHandler(self.path1)
         assert sut.data == "test data"
         mock_file.assert_called_with(self.path1, 'r')
Beispiel #14
0
def cmd_interface(
    source: Optional[str] = typer.Option(
        None,
        "--source",
        show_default=False,
        case_sensitive=__CASE_SENSITIVE,
        help="Folder ID for source directory on Google Drive",
    ),
    destination: Optional[Path] = typer.Option(
        None,
        "--dest",
        "--destination",
        exists=True,  # path needs to exist
        writable=True,  # ensures a writeable path
        dir_okay=True,  # allows path to directory
        file_okay=False,  # rejects path to a file
        resolve_path=True,  # resolves complete path
        case_sensitive=__CASE_SENSITIVE,
        help="Destination directory where `strm` files will be placed",
    ),
    root_name: Optional[str] = typer.Option(
        None,
        "--root",
        "--rootname",
        case_sensitive=__CASE_SENSITIVE,
        help="Custom name for the source directory",
    ),
    rem_extensions: bool = typer.Option(
        False,
        "--no-ext",
        "--no-extensions",
        show_default=False,
        case_sensitive=__CASE_SENSITIVE,
        help="Remove original extensions from generated strm files",
    ),
    hide_updates: bool = typer.Option(
        False,
        "--no-updates",
        show_default=False,
        case_sensitive=__CASE_SENSITIVE,
        help="Disable live progress/updates",
    ),
    force: bool = typer.Option(
        False,
        "--force",
        "-f",
        show_default=True,
        case_sensitive=__CASE_SENSITIVE,
        help="Wipe out root directory (if exists) in case of a collision",
    ),
    version: bool = typer.Option(
        None,
        "--version",
        "-v",
        is_eager=True,
        callback=__callback_version,
        case_sensitive=__CASE_SENSITIVE,
        help="Display current app version",
    ),
) -> None:
    drive_handler = DriveHandler()  # authenticate drive api

    with output(output_type="list", initial_len=9, interval=500) as outstream:
        # Replace destination directory with the current directory path if not supplied
        destination = destination if destination else os.getcwd()
        file_handler = FileHandler(
            destination=destination,
            include_extensions=not rem_extensions,
            live_updates=not hide_updates,
            outstream=outstream,
        )

        if not source or len(source) == 0:
            # No source directory is provided, get the user to choose a teamdrive
            source = drive_handler.select_teamdrive()

        if not hide_updates:
            typer.secho(
                f"Walking  through `{drive_handler.drive_name(source)}`\n",
                fg=typer.colors.GREEN,
                err=True,
            )

        __check_collisions(
            force=force,
            dst=join_path(
                destination,
                root_name if root_name else drive_handler.drive_name(source),
            ),
        )

        drive_handler.walk(
            source=source,
            change_dir=file_handler.switch_dir,
            generator=file_handler.strm_generator,
            orig_path=destination,
            custom_root=root_name,
        )

    typer.secho(
        f"Completed generating strm files\nFiles generated in: {destination}",
        fg=typer.colors.GREEN,
    )