Ejemplo n.º 1
0
    def __init__(self, *args, **kwargs):
        DataParser.__init__(self, *args, **kwargs)
        if self.data is None:
            raise ValueError('IPAQ Parser: Data not loaded')
        self.type = ''
        # if 'type' in kwargs:
        #     self.type = kwargs.get('type')
        #     self.fields = self.dbi.getFields(self.type)
        #     self.info = self.dbi.getInfo(self.type)
        # elif self.etype is not None:
        #     self.type = self.etype

        self.data.dropna(axis=1, how='all', inplace=True)
        # self.data = self.data.filter(regex="^[^78]")
        fields = [
            'sitting', 'walking_days', 'walking_time', 'moderate_days',
            'moderate_time', 'vigorous_days', 'vigorous_time', 'pa', 'mvpa'
        ]
        self.fields = fields
        self.data = self.data[['ID'] + fields]
        # self.data.columns = ['ID'] + fields
        self.data['ID'] = self.data.apply(lambda x: stripspaces(x, 0), axis=1)
        # self.data['ID'] = self.data['ID'].apply(lambda x: getID(x))
        self.sortSubjects('ID')
        print('Data load complete')

        if self.info is None:
            self.info = {'prefix': 'IP', 'xsitype': 'opex:ipaq'}
Ejemplo n.º 2
0
 def __init__(self, *args):
     DataParser.__init__(self, *args)
     fields = join(self.resource_dir, "dexa_fields.xlsx")
     #Replace field headers
     if access(fields, R_OK):
         self.fields = pd.read_excel(fields, header=0, sheetname='dexa_fields')
         df_header = pd.read_excel(fields, header=0, sheetname='dexa_header')
         self.header = df_header['concatenated'].tolist()
         self.data.columns = self.header
         print(("Loaded rows=", len(self.data['ID'])))
         #extract subject info
         df_subj = self.data.iloc[:,0:4]
         df_subj['SubjectID'] = df_subj.apply(lambda x: stripspaces(x, 'ID'), axis=1)
         #Split data into intervals
         self.intervals = {0:'BASELINE', 3:'MIDPOINT',6:'ENDPOINT', 9:'MID-FOLLOW-UP', 12:'FOLLOW-UP'}
         self.df = dict()
         for i,intval in list(self.intervals.items()):
             cols = [c for c in self.header if c.startswith(intval)]
             simplecols = []
             for col in cols:
                 cparts = col.split("_")
                 simplecols.append("_".join(cparts[1:]))
             self.df[i] = pd.concat([df_subj,self.data[cols]], axis=1)
             self.df[i].columns = df_subj.columns.tolist() + simplecols
             #self.df[i].reindex(df_subj.columns.tolist() + simplecols, fill_value='')
             if DEBUG:
                 msg ="Interval=%s data=%d" % (intval, len(self.df[i]))
                 print(msg)
         self.sortSubjects('SubjectID')
     else:
         raise ValueError("Cannot access fields file: %s" % fields)
Ejemplo n.º 3
0
 def __init__(self, *args):
     # super(AmunetParser, self).__init__(*args) - PYTHON V3
     DataParser.__init__(self, *args)
     self.dates = dict()
     self.subjects = dict()
     self.interval = None
     self.sortSubjects()
Ejemplo n.º 4
0
    def __init__(self, *args):
        DataParser.__init__(self, *args)
        #Replace field headers
        self.fields = ['depression', 'anxiety', 'stress']
        ncols = []
        for ix in range(0, 13, 3):
            ncols += [c + '_' + str(ix) for c in self.fields]

        dropcols = []  #remove check columns
        #
        self.data.set_index(list(self.data)[0], inplace=True)
        for n in range(3, len(self.data.columns), 12):
            start = n
            end = n + 9
            dropcols += self.data.columns.tolist()[start:end]
        print(('Selecting Totals columns for ', dropcols))
        df = self.data.drop(columns=dropcols)

        # #check num cols match and delete end cols in case blank ones have been included
        if len(['ID'] + ncols) < len(df.columns):
            df = df[df.columns[0:len(ncols)]]

        df.reset_index(inplace=True)
        df.columns = ['ID'] + ncols
        df.set_index('ID', inplace=True)

        self.data = df
        # #sort subjects
        self.data['SubjectID'] = self.data.index
        self.sortSubjects('SubjectID')
Ejemplo n.º 5
0
    def __init__(self, *args):
        DataParser.__init__(self, *args)
        # cleanup subjects
        self.data['ID'] = self.data.apply(lambda x: stripspaces(x, 0), axis=1)
        # self.data['ID'] = self.data['ID'].apply(lambda x: getID(x))
        if self.info is None:
            self.info = {'prefix': 'GDN', 'xsitype': 'opex:godin'}
        # Replace field headers
        self.fields = ['strenuous', 'moderate', 'light', 'total', 'sweat']
        fields = ['strenuous', 'moderate', 'light', 'total', 'sweat']
        cols = [
            'ID', 'Strenuous', 'Moderate', 'Light',
            'Totalleisureactivityscore', 'Sweat(1,2,or3)'
        ]
        ncols = ['SubjectID']
        renamecols = dict(list(zip(cols, ncols + fields)))

        df = self.data.iloc[:, 0:7]
        df.dropna(axis=0, how='any', thresh=5,
                  inplace=True)  # remove all empty rows
        df.fillna(999, inplace=True)  # replace any remaining na with 999
        df.rename(columns=renamecols, inplace=True)
        df.reindex()
        self.data = df
        # sort subjects
        self.sortSubjects('SubjectID')
        print('Data load complete')
Ejemplo n.º 6
0
    def __init__(self, inputdir, fieldsfile='fmri.xlsx', *args):
        DataParser.__init__(self, *args)
        self.inputdir = inputdir
        self.fieldsfile = os.path.join(self.resource_dir, 'fields', fieldsfile)

        self.lookup()
        self.getdata()
        self.sortSubjects('Subject')
Ejemplo n.º 7
0
 def __init__(self, *args):
     DataParser.__init__(self, *args)
     try:
         self.cantabNewFields()
         self.sortSubjects('Participant ID')
     except Exception as e:
         print(e)
         raise e
Ejemplo n.º 8
0
 def __init__(self, *args):
     DataParser.__init__(self, *args)
     #Replace field headers
     self.fields = [
         'calories_burned', 'steps', 'distance', 'floors', 'min_sed',
         'min_lightact', 'min_fairact', 'min_veryact', 'act_calories'
     ]
     self.sortSubjects('Subject')
     print('Data load complete')
Ejemplo n.º 9
0
 def __init__(self, inputdir, filename, *args):
     DataParser.__init__(self, *args)
     self.inputdir = inputdir
     self.filename = filename
     self.get_date()
     self.fields = ['q' + str(i) for i in range(1, 42)]
     self.fields_comments = ['q42a', 'q42b', 'q42c']
     self.score_fcas()
     self.sortSubjects(subjectfield='Subject')
Ejemplo n.º 10
0
    def __init__(self, *args):
        DataParser.__init__(self, *args)

        cols = ["AttentionOrientation", "Memory", "Fluency", "Language", "Visuospatial", "MMSE", "ACERTotal"]
        self.fields = ['attention', 'memory', 'fluency', 'language', 'visuospatial', 'MMSE', 'total']
        df = self.data
        df = df[['ID', 'TimePoint'] + cols]
        # df['ID'] = df['ID'].apply(lambda x: getID(x))
        df.columns=['ID', 'interval'] + self.fields
        self.data = df
        self.sortSubjects()
Ejemplo n.º 11
0
 def setUp(self):
     try:
         datafile = join(ROOTDATADIR, 'blood', 'MULTIPLEX',
                         '2018-02-01 1058VB 1021LB 1107 1114.xlsx')
         sheet = 0
         skip = 1
         header = None
         etype = 'MULTIPLEX'
         self.dp = None
         self.dp = DataParser(datafile, sheet, skip, header, etype)
     except Exception as e:
         print(e)
Ejemplo n.º 12
0
    def __init__(self, inputdir, *args):
        DataParser.__init__(self, *args)

        self.inputdir = inputdir
        self.etype = basename(dirname(self.inputdir))
        self.interval = basename(inputdir)[0:-1]
        fields = ['CA1','CA2','DG','CA3','misc','SUB','ERC','BA35','BA36','PHC','sulcus', 'Hippoc']
        flist = [[side + '_' + f for f in fields] for side in ['left', 'right']]
        self.fields = ['icv'] + [item for sublist in flist for item in sublist] + ['Total_Hippoc']

        self.parse()
        self.sortSubjects('Subject')
Ejemplo n.º 13
0
    def __init__(self, *args):
        DataParser.__init__(self, *args)
        # cleanup subjects
        self.data['ID'] = self.data.apply(lambda x: stripspaces(x, 0), axis=1)
        # self.data['ID'] = self.data['ID'].apply(lambda x: getID(x))
        df = self.data

        # Replace field headers
        self.fields = ['current', 'past', 'total']
        columns = ['CurrentResult', 'PastResult', 'TotalResult']
        df.rename(columns=dict(list(zip(columns, self.fields))), inplace=True)
        self.data = df
        self.sortSubjects('ID')
        print('Data load complete')
Ejemplo n.º 14
0
    def __init__(self, *args):
        DataParser.__init__(self, *args)

        path = r'resources\fields'
        fields = pd.read_csv(join(
            path, 'accel_fields.csv'))['ACCELEROMETRY'].values.tolist()

        self.type = basename(dirname(self.datafile))
        self.location = basename(dirname(dirname(self.datafile)))
        print(self.location)
        if self.type == 'month':
            fields = [f for f in fields if f not in ['day', 'valid_day']]

        self.fields = fields
        self.data = self.data[['Subject', 'interval'] + self.fields]
        self.sortSubjects(subjectfield='Subject')
Ejemplo n.º 15
0
class TestDataparser(unittest.TestCase):
    def setUp(self):
        try:
            datafile = join(ROOTDATADIR, 'blood', 'MULTIPLEX',
                            '2018-02-01 1058VB 1021LB 1107 1114.xlsx')
            sheet = 0
            skip = 1
            header = None
            etype = 'MULTIPLEX'
            self.dp = None
            self.dp = DataParser(datafile, sheet, skip, header, etype)
        except Exception as e:
            print(e)

    def tearDown(self):
        if self.dp is not None:
            self.dp.dbi.closeconn()

    def test_info(self):
        if self.dp is not None:
            expected = {
                'prefix': u'MPX',
                'xsitype': u'opex:bloodMultiplexData'
            }
            self.assertDictEqual(expected, self.dp.info)

    def test_fields(self):
        if self.dp is not None:
            expected = [
                u'GH', u'Leptin', u'BDNF', u'IGFBP7', u'IL1', u'IL4', u'IL6',
                u'IL10'
            ]
            self.assertListEqual(expected, self.dp.fields)

    def test_prefix(self):
        if self.dp is not None and self.dp.info is not None:
            data = self.dp.getPrefix()
            expected = 'MPX'
            self.assertEqual(expected, data)

    def test_xsd(self):
        if self.dp is not None and self.dp.info is not None:
            data = self.dp.getxsd()
            expected = 'opex:bloodMultiplexData'
            self.assertEqual(expected, data)
Ejemplo n.º 16
0
    def __init__(self, *args, **kwargs):
        DataParser.__init__(self, *args)
        #Maybe empty sheet
        if self.data.empty or len(self.data.columns) <= 1:
            msg = "No data available"
            raise ValueError(msg)
        # cleanup subjects
        self.data['ID'] = self.data.apply(lambda x: stripspaces(x, 0), axis=1)

        if self.info is None:
            self.info = {'prefix': 'INS', 'xsitype': 'opex:insomnia'}
        # Replace field headers
        self.fieldmap = {
            'q1': 'Q1',
            'q2': 'Q2',
            'q3': 'Q3',
            'q4': 'Q4',
            'q5': 'Q5',
            'q6': 'Q6',
            'q7': 'Q7',
            'total': 'TotalScore'
        }
        cols = [
            'ID', self.fieldmap['q1'], self.fieldmap['q2'],
            self.fieldmap['q3'], self.fieldmap['q4'], self.fieldmap['q5'],
            self.fieldmap['q6'], self.fieldmap['q7'], self.fieldmap['total']
        ]
        # self.fieldmap = {'total': 'TotalScore'}
        self.fields = list(self.fieldmap.keys())
        ncols = ['SubjectID'] + self.fields
        # cols = ['ID', self.fieldmap['total']]
        # zeros have been entered when should be blank
        self.data[self.fieldmap['total']] = self.data.apply(
            lambda x: self.nodatarow(x, self.fieldmap['total']), axis=1)
        self.data[cols[1:]] = self.data.apply(
            lambda x: self.nodatarow(x, cols[1:]), axis=1)
        df = self.data[cols]
        df = df.astype(object)  # convert to object
        df.columns = ncols
        df.reindex()

        self.data = df
        # sort subjects
        self.sortSubjects('SubjectID')
        print('Data load complete')
Ejemplo n.º 17
0
    def __init__(self, *args):
        DataParser.__init__(self, *args)

        df = self.data
        df['Subject'] = df['subjname'].apply(
            lambda x: re.findall('(?<=sub-)(.*)(?=_)', x)[0])
        df['interval'] = df['subjname'].apply(
            lambda x: int(re.findall('(?<=ses-)(.*)', x)[0]))
        renamecols = {
            'ICV': 'icv',
            'right_subiculum': 'right_SUB',
            'left_subiculum': 'left_SUB'
        }
        df.rename(columns=renamecols, inplace=True)

        self.cols = [c for c in self.fields if c in df.columns]
        self.data = df[['Subject', 'interval'] + self.cols]
        self.sortSubjects('Subject')
Ejemplo n.º 18
0
    def __init__(self, *args):
        DataParser.__init__(self, *args)

        # self.convertScores()
        # self.scoreSF()

        # pop_fields = ['GenPop_PF', 'GenPop_RP', 'GenPop_BP', 'GenPop_GH', 'GenPop_VT', 'GenPop_SF', 'GenPop_RE',
                      # 'GenPop_MH', 'GenPop_PCS', 'GenPop_MCS']

        self.fields = ['PF', 'RP', 'BP', 'GH', 'VT', 'SF', 'RE', 'MH', 'PCS', 'MCS']

        df = self.data
        df[['Subject', 'interval']] = df.pop('RecordID').str.split('\\s', 1, expand=True)
        df['interval'] = df['interval'].apply(extract_interval)
        # df['Subject'] = df['Subject'].apply(lambda x: getID(x))
        self.data = df
        # self.data = df[['Subject', 'interval'] + self.fields]

        self.sortSubjects('Subject')
Ejemplo n.º 19
0
 def setUp(self):
     try:
         datafile = join(ROOTDATADIR, 'cantab',
                         'RowBySession_HealthyBrains_20180504.csv')
         sheet = 0
         skip = 0
         header = None
         etype = 'CANTAB'
         self.dp = None
         self.dp = DataParser(datafile, sheet, skip, header, etype)
     except Exception as e:
         print(e)
Ejemplo n.º 20
0
    def __init__(self, *args):
        DataParser.__init__(self, *args)
        # cleanup subjects
        self.data['ID'] = self.data.apply(lambda x: stripspaces(x, 0), axis=1)
        # self.data['ID'] = self.data['ID'].apply(lambda x: getID(x))

        if self.info is None:
            self.info = {'prefix': 'GDN', 'xsitype': 'opex:paces'}
        # Replace field headers
        self.fields = ['q' + str(i)
                       for i in range(1, 9)] + ['total', 'enjoy_percent']
        columns = [
            'Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6', 'Q7', 'Q8', 'SumTotal',
            '%Enjoyment'
        ]
        renamecols = dict(list(zip(columns, self.fields)))

        self.data.rename(columns=renamecols, inplace=True)

        self.sortSubjects('ID')
        print('Data load complete')
Ejemplo n.º 21
0
 def __init__(self, *args):
     DataParser.__init__(self, *args)
     # Maybe empty sheet
     if self.data.empty or len(self.data.columns) <= 1:
         msg = "No data available"
         raise ValueError(msg)
     # cleanup subjects
     self.data['ID'] = self.data.apply(lambda x: stripspaces(x, 0), axis=1)
     # self.data['ID'] = self.data['ID'].apply(lambda x: getID(x))
     if self.info is None:
         self.info = {'prefix': 'PSQ', 'xsitype': 'opex:psqi'}
     # Replace field headers
     self.fields = ['c'+str(i) for i in range(1,8)] + ['total']
     ncols = ['SubjectID'] + self.fields
     cols = ['ID'] + [c for c in self.data.columns if (isinstance(c,str) or isinstance(c,str)) and c.startswith('Component')] + ['total']
     df = self.data[cols]
     df.columns = ncols
     df.reindex()
     self.data = df
     # sort subjects
     self.sortSubjects('SubjectID')
     print('Data load complete')
Ejemplo n.º 22
0
    def __init__(self, inputdir, inputsubdir, datafile, testonly=False):
        DataParser.__init__(self, etype='COSMED')
        self.inputdir = inputdir
        self.testonly = testonly
        # Load fields
        fieldsfile = join(self.getResourceDir(), "cosmed_fields.xlsx")
        self.subjectdataloc = pd.read_excel(fieldsfile,
                                            header=0,
                                            sheet_name='cosmed')
        self.fields = pd.read_excel(fieldsfile,
                                    header=0,
                                    sheet_name='cosmed_xnat')
        self.datafields = pd.read_excel(fieldsfile,
                                        header=0,
                                        sheet_name='cosmed_data')

        # Get list of subjects - parse individual files
        self.subjects = dict()
        self.files = glob.glob(join(inputsubdir, "*.xlsx"))
        # create an output dir for processed files
        pdir = join(inputsubdir, 'processed')
        if not isdir(pdir):
            mkdir(pdir)

        # Load efficiency data from single file
        self.effdata_cols = {
            '0': [9, 12],
            '3': [13, 16],
            '6': [17, 20],
            '9': [21, 24],
            '12': [25, 28]
        }
        self.effdata = self.__loadEfficiencydata(datafile)

        # Load data from files
        self.loaded = self.__loadData()
Ejemplo n.º 23
0
 def __init__(self, *args):
     DataParser.__init__(self, *args)
     self.training_file = pd.ExcelFile(self.datafile)
     self.type = re.findall('(?<=Diary-)[A-Z]{3}', basename(self.datafile))[0]
     self.extraction = {'AIT': extract_AIT, 'MIT': extract_MIT, 'LIT': extract_LIT}
     self.getData()
Ejemplo n.º 24
0
    def __init__(self, inputdir, *args):
        DataParser.__init__(self, *args)

        self.inputdir = inputdir
        self.getData()
        self.sortSubjects('Subject')
Ejemplo n.º 25
0
 def __init__(self, *args):
     DataParser.__init__(self, *args)
     self.get_data()
     self.sortSubjects('Subject')
Ejemplo n.º 26
0
    def __init__(self, *args, **kwargs):
        DataParser.__init__(self, *args)
        if self.data is None:
            raise ValueError('BloodParser: Data not loaded')
        self.type = ''
        if 'type' in kwargs:
            self.type = kwargs.get('type')
            self.fields = self.dbi.getFields(self.type)
            self.info = self.dbi.getInfo(self.type)
            # self.fields = self.getFieldsFromFile(self.type)

        elif self.etype is not None:
            self.type = self.etype
        print('Rename Headers for ', self.type)
        ## Rename columns in dataframe
        if self.type == 'IGF':
            colnames = {'Date': 'A_Date',
                        'Participant ID ': 'Participant ID',
                        'Timepoint': 'Sample ID',
                        'IGF-1': 'IGF1'}
            self.data = self.data.rename(index=str, columns=colnames)

        elif self.type == 'SOMATO':
            colnames = {'Date': 'A_Date',
                        'Participant ID ': 'Participant ID',
                        'Timepoint': 'Sample ID',
                        'Somatostatin': 'somatostatin'}
            self.data = self.data.rename(index=str, columns=colnames)

        elif self.type == 'BDNF':
            colnames = {'Date': 'A_Date',
                        'Participant ID ': 'Participant ID',
                        'Timepoint': 'Sample ID'}
            self.data = self.data.rename(index=str, columns=colnames)

        elif self.type == 'MULTIPLEX':
            colnames = {'Date': 'A_Date',
                        'Participant ID ': 'Participant ID',
                        'Timepoint': 'Sample ID',
                        'IGFBP-7': 'IGFBP7'}
            self.data = self.data.rename(index=str, columns=colnames)

        elif self.type == 'INFLAM':
            print('Headers for ', self.type)
            colnames = {'Date': 'A_Date',
                        'Participant ID ': 'Participant ID',
                        'Timepoint': 'Sample ID',
                        u'IFN\u03b3': 'ifngamma',
                        'IL-10': 'il10',
                        'IL-12(p70)': 'il12p70',
                        u'IL-1\u03b2': 'il1beta',
                        'IL-6': 'il6',
                        'IL-8': 'il8cxcl8',
                        u'TNF\u03B1': 'tnfalpha'
                        }
            self.data = self.data.rename(index=str, columns=colnames)

        elif self.type == 'ELISAS':
            colnames = {'Date': 'A_Date',
                        'Participant ID ': 'Participant ID',
                        'Timepoint': 'Sample ID',
                        'Beta-H (ng/ul)': 'BetaHydroxy'}
            self.data = self.data.rename(index=str, columns=colnames)

        elif self.type == 'COBAS':
            # Name unnamed columns to field names
            if self.fields[0] not in self.data.columns:
                colnames = {}
                v = 1
                for i in range(len(self.fields)):
                    colnames['Value.' + str(v)] = self.fields[i]
                    v = v + 2
            else:
                colnames = {'Date': 'A_Date',
                            'Participant ID ': 'Participant ID',
                            'Timepoint': 'Sample ID',
                            'Prolactin': 'Prolactin',
                            'Insulin': 'Insulin',
                            'HGH': 'HGH',
                            'Cortisol': 'Cortisol'}

            self.data = self.data.rename(index=str, columns=colnames)
        print('Colnames: ', self.data.columns.tolist())
        # Insert Row Number column
        if 'R_No.' not in self.data.columns:
            self.data.insert(0, 'R_No.', list(range(len(self.data))))

        # Remove NaT rows
        i = self.data.query('A_Date =="NaT"')
        if not i.empty:
            self.data.drop(i.index[0], inplace=True)
            print('NaT row dropped')

        # Organize data into subjects
        subjectfield = 'Participant ID'
        if subjectfield not in self.data.columns:
            raise ValueError('Subject ID field not present: ', subjectfield)
        self.data[subjectfield] = self.data[subjectfield].str.replace(" ", "")
        self.sortSubjects(subjectfield)
        if self.subjects is not None:
            print('BloodParser: subjects loaded successfully')
        self.subjectfield = subjectfield
Ejemplo n.º 27
0
 def __init__(self, *args):
     DataParser.__init__(self, *args)
     # self.opex = pd.read_csv(join(self.resource_dir, 'opex.csv'))
     self.expts = dict()
Ejemplo n.º 28
0
    def __init__(self, **kwargs):
        # super(AmunetParser, self).__init__(*args) - PYTHON V3
        DataParser.__init__(self, **kwargs)
        self.data = missingData(self.datafile)

        self.sortSubjects('Subject')