Beispiel #1
0
    def create_dataframe(self,
                         data=None,
                         df_format="json",
                         csv_dates=None,
                         index_col=None):
        """
        Create Pandas dataframe from external source

        Parameters
        ----------

        data : object, list, dict or str
           object : pandas dataframe - will be returned as is
           list : list of folders to load data frame
           str : filename to load data frome
           dict : data in dict
        """
        if data is not None:
            if isinstance(data, pd.DataFrame):
                return data
            elif isinstance(data, dict):
                return pd.DataFrame([data])
            elif isinstance(data, basestring):
                local_file = self.work_folder + "/data"
                futil = fu.FileUtil(aws_key=self.aws_key,
                                    aws_secret=self.aws_secret)
                futil.copy(data, local_file)
                return self._convert_dataframe(local_file, df_format,
                                               csv_dates, index_col)
            elif isinstance(data, list):
                return np.array(data).reshape(1, -1)
            else:
                raise ValueError("unknown argument type for data")
Beispiel #2
0
 def __init__(self,
              input_folders=[],
              output_folder=None,
              models_folder=None,
              local_models_folder="./models",
              local_data_folder="./data",
              aws_key=None,
              aws_secret=None,
              data_type='json'):
     self.pipeline = []
     self.models_folder = models_folder
     self.input_folders = input_folders
     self.output_folder = output_folder
     self.local_models_folder = local_models_folder
     self.local_data_folder = local_data_folder
     if not os.path.exists(self.local_models_folder):
         os.makedirs(self.local_models_folder)
     if not os.path.exists(self.local_data_folder):
         os.makedirs(self.local_data_folder)
     self.fu = fu.FileUtil(key=aws_key, secret=aws_secret)
     self.logger = logging.getLogger('seldon')
     self.logger.setLevel(logging.DEBUG)
     ch = logging.StreamHandler()
     formatter = logging.Formatter(
         '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
     ch.setFormatter(formatter)
     self.logger.addHandler(ch)
     self.current_dataset = self.local_data_folder + "/current"
     self.next_dataset = self.local_data_folder + "/next"
     self.data_type = data_type
Beispiel #3
0
    def save_dataframe(self, df, location, df_format="json", csv_index=True):
        """Save dataframe

        Parameters
        ----------

        df : pandas daraframe
           dataframe to save
        location : str
           external filesystem location to save to
        df_format : str
           format to use : json or csv
        csv_index : bool
           whether to save index when outputing to csv
        """
        self.create_work_folder()
        tmp_file = self.work_folder + "/df_tmp"
        if df_format == 'csv':
            logger.info("saving dataframe as csv")
            df.to_csv(tmp_file, index=csv_index)
        else:
            logger.info("saving dataframe as json")
            f = open(tmp_file, "w")
            for i in range(0, df.shape[0]):
                row = df.irow(i).dropna()
                jNew = row.to_dict()
                jStr = json.dumps(jNew, sort_keys=True)
                f.write(jStr + "\n")
            f.close()
        futil = fu.FileUtil(aws_key=self.aws_key, aws_secret=self.aws_secret)
        futil.copy(tmp_file, location)
 def stream_and_upload(self,folder):
     self.truncate_table()
     futl = fu.FileUtil()
     futl.stream(folder,self.upload)
     if len(self.inserts) > 0:
         logger.info("Running final batch with rows inserted %d",self.rows)
         self.reallyDoInserts(self.inserts)
     self.rename_table()
     self.db.commit()
Beispiel #5
0
 def save_extension(self, extension, location):
     self.create_work_folder()
     rint = random.randint(1, 999999)
     extension_folder = self.work_folder + "/extension_tmp" + str(rint)
     if not os.path.exists(extension_folder):
         logger.info("creating folder %s", extension_folder)
         os.makedirs(extension_folder)
     tmp_file = extension_folder + "/ext"
     joblib.dump(extension, tmp_file)
     extension.save(extension_folder)
     futil = fu.FileUtil(aws_key=self.aws_key, aws_secret=self.aws_secret)
     futil.copy(extension_folder, location)
Beispiel #6
0
 def load_extension(self,extension_folder):
     self.create_work_folder()
     rint = random.randint(1,999999)
     local_extension_folder = self.work_folder+"/extension_tmp"+str(rint)
     if not os.path.exists(local_extension_folder):
         logger.info("creating folder %s",local_extension_folder)
         os.makedirs(local_extension_folder)
     futil = fu.FileUtil(aws_key=self.aws_key,aws_secret=self.aws_secret)
     futil.copy(extension_folder,local_extension_folder)
     extension =  joblib.load(local_extension_folder+"/ext")
     extension.load(local_extension_folder)
     return extension
Beispiel #7
0
 def _copy_features_locally(self, locations, local_file, df_format):
     self.df_format = df_format
     self.create_work_folder()
     logger.info("streaming features %s to %s", locations, local_file)
     logger.info("input type is %s", self.df_format)
     self.lines_read = 0
     self.active_file = open(local_file, "w")
     if not self.df_format == 'csv':
         self.active_file.write("[")
     futil = fu.FileUtil(aws_key=self.aws_key, aws_secret=self.aws_secret)
     futil.stream_multi(locations, self._save_features_local)
     if not self.df_format == 'csv':
         self.active_file.write("]")
     self.active_file.close()
     logger.info("finished stream of features")
Beispiel #8
0
    def load_pipeline(self, pipeline_folder):
        """
        Load scikit learn pipeline from external folder
        
        Parameters
        ----------

        pipeline_folder : str
           external folder holding pipeline
        """
        self.create_work_folder()
        local_pipeline_folder = self.work_folder + "/pipeline"
        if not os.path.exists(local_pipeline_folder):
            logger.info("creating folder %s", local_pipeline_folder)
            os.makedirs(local_pipeline_folder)
        futil = fu.FileUtil(aws_key=self.aws_key, aws_secret=self.aws_secret)
        futil.copy(pipeline_folder, local_pipeline_folder)
        return joblib.load(local_pipeline_folder + "/p")
Beispiel #9
0
    def save_pipeline(self, pipeline, location):
        """
        Save scikit learn pipeline to external location

        Parameters
        ----------

        pipelines : sklearn pipeline
           pipeline to be saved
        location : str
           external folder to save pipeline
        """
        self.create_work_folder()
        pipeline_folder = self.work_folder + "/pipeline"
        if not os.path.exists(pipeline_folder):
            logger.info("creating folder %s", pipeline_folder)
            os.makedirs(pipeline_folder)
        tmp_file = pipeline_folder + "/p"
        joblib.dump(pipeline, tmp_file)
        futil = fu.FileUtil(aws_key=self.aws_key, aws_secret=self.aws_secret)
        futil.copy(pipeline_folder, location)
Beispiel #10
0
    def load_recommender(self,recommender_folder):
        """
        Load scikit learn recommender from external folder
        
        Parameters
        ----------

        recommender_folder : str
           external folder holding recommender
        """
        self.create_work_folder()
        rint = random.randint(1,999999)
        local_recommender_folder = self.work_folder+"/recommender_tmp"+str(rint)
        if not os.path.exists(local_recommender_folder):
            logger.info("creating folder %s",local_recommender_folder)
            os.makedirs(local_recommender_folder)
        futil = fu.FileUtil(aws_key=self.aws_key,aws_secret=self.aws_secret)
        futil.copy(recommender_folder,local_recommender_folder)
        recommender =  joblib.load(local_recommender_folder+"/rec")
        recommender.load(local_recommender_folder)
        return recommender
Beispiel #11
0
    def save_recommender(self, recommender, location):
        """
        Save recommender to external location

        Parameters
        ----------

        recommender : Recommender 
           recommender to be saved
        location : str
           external folder to save recommender
        """
        self.create_work_folder()
        rint = random.randint(1, 999999)
        recommender_folder = self.work_folder + "/recommender_tmp" + str(rint)
        if not os.path.exists(recommender_folder):
            logger.info("creating folder %s", recommender_folder)
            os.makedirs(recommender_folder)
        tmp_file = recommender_folder + "/rec"
        joblib.dump(recommender, tmp_file)
        recommender.save(recommender_folder)
        futil = fu.FileUtil(aws_key=self.aws_key, aws_secret=self.aws_secret)
        futil.copy(recommender_folder, location)