Ejemplo n.º 1
0
    def predict(self, descs=None, to_file=False):
        '''
        Call package functions to predict
        '''
        if descs == None:
            self.descs = self.__readDescs(self.directory + '/descriptors/output.txt')
        else:
            self.descs = descs

        self.predictor = Predictor(network_name=self.networks,
                            predict_descs=self.descs,
                            training_descs=self.trn,
                            featureNorm=True)
        # feature Normalization
        normedDescs = self.predictor.featureNorm(self.predictor.descsForPred, self.predictor.X_scalar)
        prediction_results = self.predictor.predict(self.predictor.network, normedDescs)
        # Transform back
        self.prediction_results = np.exp(prediction_results)
        # if to_file:
        # # save to file
        #     np.savetxt('prediction_results.csv',self.prediction_results,delimiter=',')
        #     print "Prediction results saved [prediction_results]. Make a copy if you want to keep it."
        results_array = self.prediction_results.tolist()
        confidence = self.predict_conf()
        results = []

        for idx, smiles in enumerate(self.smiles):
            results.append([smiles, results_array[idx], confidence[idx]])

        return results
Ejemplo n.º 2
0
class NetPrediction:

    def __init__(self):
        class_directory = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
        self.directory = class_directory
        self.networks = self.directory + '/nets/CED_new.xml'
        self.trn = self.__readTrn(self.directory + '/descriptors/trn_data_30.csv')
#         try:
#             opts, args = getopt.getopt(argv,"hn:d:t:",["net=","descs=","trn="])
#         except getopt.GetoptError:
#             print 'predict.py -n [Networks] -d [Descs] -t [TrnData]'
#             sys.exit(2)
#
# #         if len(args) == 0:
# #             print 'Please provide networks, descriptors and training data, use -h for help'
# #             sys.exit(2)
#
#         for opt, arg in opts:
#             if opt == '-h':
#                 print 'predict.py -n <network> -s <SMILEs>'
#                 sys.exit()
#             elif opt in ("-n", "--net"):
#                 self.networks = arg
#             elif opt in ("-d", "--descs"):
#                 self.descs = self._readDescs(arg)
#             elif opt in ("-t", "--trn"):
#                 self.trn = self._readTrn(arg)
#             else:
#                 print "Wrong arguments, use -h for helps."
#
#         try:
#             self.predictor = ann.predicting(network_name=self.networks,
#                                 predict_descs=self.descs,
#                                 training_descs=self.trn,
#                                 featureNorm=True)
#         except AttributeError:
#             print 'Worng input files, please provide networks, descriptors and training data, use -h for help'
#             sys.exit(2)

    def __readDescs(self,raw_file):
        '''
        clean up descs file use pandas
        remove header and col name

        Input: raw_file: raw descriptors file with header and two useless colnums
        return pure numpy arrary of descriptors values
        '''
        df = pd.read_csv(raw_file,delim_whitespace=True)
        df = df.fillna(df.mean())
        df = df.drop('No.', 1)
        self.smiles = df['NAME']
        df = df.drop('NAME',1)
        val = df.values
        return val

    def __readTrn(self,trn_data):
        '''
        read training data
        '''
#         trnData = np.loadtxt(trn_data,skiprows=1)
        df = pd.read_csv(trn_data)
        trnData = df.values
        return trnData

    def predict(self, descs=None, to_file=False):
        '''
        Call package functions to predict
        '''
        if descs == None:
            self.descs = self.__readDescs(self.directory + '/descriptors/output.txt')
        else:
            self.descs = descs

        self.predictor = Predictor(network_name=self.networks,
                            predict_descs=self.descs,
                            training_descs=self.trn,
                            featureNorm=True)
        # feature Normalization
        normedDescs = self.predictor.featureNorm(self.predictor.descsForPred, self.predictor.X_scalar)
        prediction_results = self.predictor.predict(self.predictor.network, normedDescs)
        # Transform back
        self.prediction_results = np.exp(prediction_results)
        # if to_file:
        # # save to file
        #     np.savetxt('prediction_results.csv',self.prediction_results,delimiter=',')
        #     print "Prediction results saved [prediction_results]. Make a copy if you want to keep it."
        results_array = self.prediction_results.tolist()
        confidence = self.predict_conf()
        results = []

        for idx, smiles in enumerate(self.smiles):
            results.append([smiles, results_array[idx], confidence[idx]])

        return results

    def predict_conf(self):
        '''
        Prediction Confidence Level,
        Base on the distance to the centroid of training data
        '''
        assert self.prediction_results is not None
        assert self.trn is not None

        self.dist = self.predictor.calcDist(self.descs, self.trn)
        conf = []
        for eachDist in self.dist:
            if eachDist > 100:
                conf.append('Low')
            else:
                conf.append('High')

        self.prediction_conf = conf
#         print self.smiles,self.dist
        return conf

    def to_file(self):
        '''
        combine outputs to file
        '''
        assert self.prediction_results is not None

        outputs = pd.DataFrame(self.smiles)
        outputs['prediction results'] = self.prediction_results
        outputs['prediction confidence'] = self.prediction_conf
        outputs.to_csv('prediction_results.csv')
        print 'Prediction results output to "prediction_results.csv". Make a copy if you want to save the results'