Exemple #1
0
  def getUserLabels(self):
    """Collect UserLabels."""
    labelPaths = absoluteFilePaths(self.labelDir)

    self.userLabels = [CorpusLabel(path, self.corpus) for path in labelPaths]

    if len(self.userLabels) == 0:
      raise ValueError("No users labels found")

    self.nlabelers = len(self.userLabels)
Exemple #2
0
  def getDataFiles(self):
    """
    Collect all CSV data files from self.srcRoot directory.

    @return (dict)    Keys are relative paths (from self.srcRoot) and values are
                      the corresponding data files.
    """
    filePaths = absoluteFilePaths(self.srcRoot)
    dataSets = [DataFile(path) for path in filePaths if ".csv" in path]

    def getRelativePath(srcRoot, srcPath):
      return srcPath[srcPath.index(srcRoot)+len(srcRoot):]\
        .strip(os.path.sep).replace(os.path.sep, "/")
Exemple #3
0
    def getRawLabels(self):
        """Collect the raw user labels from specified directory."""
        labelPaths = absoluteFilePaths(self.labelDir)
        self.userLabels = []
        self.knownLabels = []
        for path in labelPaths:
            if "known" in path:
                self.knownLabels.append(CorpusLabel(path, self.corpus))
            else:
                self.userLabels.append(CorpusLabel(path, self.corpus))

        self.nLabelers = len(self.userLabels)
        if self.nLabelers == 0:
            raise ValueError("No users labels found")
Exemple #4
0
  def getRawLabels(self):
    """Collect the raw user labels from specified directory."""
    labelPaths = absoluteFilePaths(self.labelDir)
    self.userLabels = []
    self.knownLabels = []
    for path in labelPaths:
      if "known" in path:
        self.knownLabels.append(CorpusLabel(path, self.corpus))
      else:
        self.userLabels.append(CorpusLabel(path, self.corpus))

    self.nLabelers = len(self.userLabels)
    if self.nLabelers == 0:
      raise ValueError("No users labels found")
Exemple #5
0
  def getDataFiles(self):
    """
    Collect all CSV data files from self.srcRoot directory.

    @return (dict)    Keys are relative paths (from self.srcRoot) and values are
                      the corresponding data files.
    """
    filePaths = absoluteFilePaths(self.srcRoot)
    dataSets = [DataFile(path) for path in filePaths if ".csv" in path]

    def getRelativePath(srcRoot, srcPath):
      return srcPath[srcPath.index(srcRoot)+len(srcRoot):].strip("/")

    return {getRelativePath(self.srcRoot, d.srcPath) : d for d in dataSets}
Exemple #6
0
    def getDataFiles(self):
        """
    Collect datafiles from self.srcRoot where datafiles are stored in a dictionary
    in which the path relative to the self.srcRoot is their key.

    @return (dict)    Dictionary containing key value pairs of a relative path
                      and its corresponding datafile.
    """
        filePaths = absoluteFilePaths(self.srcRoot)
        dataSets = [DataFile(path) for path in filePaths]

        def getRelativePath(srcRoot, srcPath):
            return srcPath[srcPath.index(srcRoot) + len(srcRoot) :].strip("/")

        dataSets = {getRelativePath(self.srcRoot, d.srcPath): d for d in dataSets}
        return dataSets
Exemple #7
0
    def getDataFiles(self):
        """
    Collect datafiles from self.srcRoot where datafiles are stored in a dictionary
    in which the path relative to the self.srcRoot is their key.

    @return (dict)    Dictionary containing key value pairs of a relative path
                      and its corresponding datafile.
    """
        filePaths = absoluteFilePaths(self.srcRoot)
        dataSets = [DataFile(path) for path in filePaths]

        def getRelativePath(srcRoot, srcPath):
            return srcPath[srcPath.index(srcRoot) + len(srcRoot):].strip("/")

        dataSets = {getRelativePath(self.srcRoot, d.srcPath) : d \
                                                                for d in dataSets}
        return dataSets
Exemple #8
0
    def getDataFiles(self):
        """
    Collect all CSV data files from self.srcRoot directory.

    @return (dict)    Keys are relative paths (from self.srcRoot) and values are
                      the corresponding data files.
    """
        filePaths = absoluteFilePaths(self.srcRoot)
        dataSets = [DataFile(path) for path in filePaths if ".csv" in path]

        def getRelativePath(srcRoot, srcPath):
            # Handle case where srcRoot is already relative
            srcRoot = os.path.abspath(srcRoot)
            ind = srcPath.index(srcRoot)
            root_len = len(srcRoot)
            return srcPath[ind+root_len:]\
              .strip(os.path.sep).replace(os.path.sep, "/")

        return {getRelativePath(self.srcRoot, d.srcPath): d for d in dataSets}
Exemple #9
0
#%%
print('\n ==Preprocessing==========================================')
#construct a dict per collection of datasets
#the idea is that each collection has datasets with similar formats
collections = {
    #"nab" : {'folder':'/nab/','delimiter':',','timestamp':True},
    "synth": {
        'folder': '/synth/',
        'delimiter': ',',
        'timestamp': False
    }
}

#now use the collections to create preprocessors and automatically preprocess each folder
for c in collections:
    rawPaths = absoluteFilePaths(rawDir + collections[c]['folder'])
    for f in rawPaths:
        print('\n **Preprocessing ' + f)
        pre = Preprocessor(fileName=f,
                           delimiter=collections[c]['delimiter'],
                           timestamp=collections[c]['timestamp'],
                           folderStruct=collections[c]['folder'])
        pre.autoPP()
print('\n ==Finished Preprocessing==================================')

#create a dict via nab.corpus that contain filenames as keys and dataframes as values
print('\n Loading preprocessed data files to corpus')
corpus = Corpus(dataDir)
data = corpus.dataFiles
print('The following datasets will be analysed')
for d in data:
Exemple #10
0
 def getUserLabels(self):
   """Collect UserLabels."""
   labelPaths = absoluteFilePaths(self.labelRoot)
   userLabels = [UserLabel(path, corp=self.corpus) for path in labelPaths]
   self.userLabels = userLabels
   self.nlabelers = len(self.userLabels)