Example #1
0
    def data(self):
        """
        Get Selection data as a pandas.DataFrame

        If the data is modified, 'save' must be called for CASM to use the modified selection.
        """
        if self._data is None:
            if self.path in ["MASTER", "ALL", "CALCULATED"]:
                self._data = query(self.proj, ['configname', 'selected'],
                                   self,
                                   all=self.all)
            elif self._is_json():
                self._data = pandas.read_json(self.path, 'r', orient='records')
            else:
                with open(self.path, compat.pandas_rmode()) as f:
                    if compat.peek(f) == '#':
                        f.read(1)
                    self._data = pandas.read_csv(f,
                                                 sep=compat.str(' +'),
                                                 engine='python')

            self._clean_data()

            if not self.all:
                self._data = self._data[self._data['selected'] == True]

        return self._data
Example #2
0
    def query(self, columns, force=False, verbose=False):
        """ Query requested columns and store them in 'data'.

        Will not overwrite columns that already exist, unless 'force'==True. Will query data for all configurations, whether selected or not, if `self.all == True`.

        Arguments
        ---------
        columns: List(str)
            Data requested, will be added as columns in `self.data`. This corresponds to the `-k` option of `casm query`. A list of options can be obtained from `casm query --help properties`.

        force: bool
            If `force==False`, input `columns` that already exist in `self.data.columns` will be ignored and those columns will not be updated. If `force==True`, those columns will be overwritten with new data.

        verbose: bool
            How much to print to stdout.
        """

        if force == False:
            _col = [x for x in columns if x not in self.data.columns]
        else:
            _col = columns

        if verbose:
            print("# Query requested:", columns)
            if force == False:
                print("# Use existing:",
                      [x for x in columns if x in self.data.columns])
            else:
                print("# Overwrite existing:",
                      [x for x in columns if x in self.data.columns])
            if len(_col) == 0:
                print("# No query necessary")
            else:
                print("# Querying:", _col)

        if len(_col) == 0:
            return

        df = query(self.proj,
                   _col,
                   self.path,
                   self.type,
                   verbatim=True,
                   all=self.all)

        if verbose:
            print("#   DONE\n")

        msg = "querying different numbers of records: {0}, {1}".format(
            self.data.shape, df.shape)
        assert self.data.shape[0] == df.shape[0], msg

        for c in df.columns:
            self.data.loc[:, c] = df.loc[:, c].values
Example #3
0
    def query(self, columns, force=False, verbose=False):
        """
        Query requested columns and store them in 'data'. Will not overwrite
        columns that already exist, unless 'force'==True.
        
        Will query data for all configurations, whether selected or not, if
        self.all == True.
        """

        if force == False:
            _col = [x for x in columns if x not in self.data.columns]
        else:
            _col = columns

        if verbose:
            print "# Query requested:", columns
            if force == False:
                print "# Use existing:", [
                    x for x in columns if x in self.data.columns
                ]
            else:
                print "# Overwrite existing:", [
                    x for x in columns if x in self.data.columns
                ]
            if len(_col) == 0:
                print "# No query necessary"
            else:
                print "# Querying:", _col

        if len(_col) == 0:
            return

        df = query(self.proj, _col, self, all=self.all)

        if verbose:
            print "#   DONE\n"

        msg = "querying different numbers of records: {0}, {1}".format(
            self.data.shape, df.shape)
        assert self.data.shape[0] == df.shape[0], msg

        for c in df.columns:
            self.data.loc[:, c] = df.loc[:, c].values
Example #4
0
 def data(self):
     """
     Get Selection data as a pandas.DataFrame
     
     If the data is modified, 'save' must be called for CASM to use the modified selection.
     """
     if self._data is None:
       if self.path in ["MASTER", "ALL", "CALCULATED"]:
         self._data = query(self.proj, ['configname', 'selected'], self)
       elif self._is_json():
         self._data = pandas.read_json(self.path, orient='records')
       else:
         f = open(self.path, 'r')
         f.read(1)
         self._data = pandas.read_csv(f, sep=' *', engine='python')
       
       self._clean_data()
       
       if not self.all:
         self._data = self._data[self._data['selected']==True] 
       
     return self._data
Example #5
0
 def query(self, columns, force=False, verbose=False):
     """
     Query requested columns and store them in 'data'. Will not overwrite
     columns that already exist, unless 'force'==True.
     
     Will query data for all configurations, whether selected or not, if
     self.all == True.
     """
     
     if force == False:
       _col = [x for x in columns if x not in self.data.columns]
     else:
       _col = columns
     
     if verbose:
       print "# Query requested:", columns
       if force == False:
         print "# Use existing:", [x for x in columns if x in self.data.columns]
       else:
         print "# Overwrite existing:", [x for x in columns if x in self.data.columns]
       if len(_col) == 0:
         print "# No query necessary"
       else:
         print "# Querying:", _col
       
     if len(_col) == 0:
       return
     
     df = query(self.proj, _col, self, all=self.all)
     
     if verbose:
       print "#   DONE\n"
     
     msg = "querying different numbers of records: {0}, {1}".format(
       self.data.shape, df.shape)
     assert self.data.shape[0] == df.shape[0], msg
     
     for c in df.columns:
       self.data.loc[:,c] = df.loc[:,c].values