예제 #1
0
파일: algorithms.py 프로젝트: gaow/SEQPower
 def __result_parser(self, result):
     '''parse result input example:
     [{'num_variants_CFisher': 3, 'pvalue_CFisher': 0.06156133406564546, 'total_mac_CFisher': 4, 'sample_size_CFisher': 400, 'statistic_CFisher': 1.7976931348623157e+308}, {'statistic_WSSRankTest': 1784.0, 'total_mac_WSSRankTest': 4, 'pvalue_WSSRankTest': 0.027761334758504563, 'num_variants_WSSRankTest': 3, 'sample_size_WSSRankTest': 400}]
     output format: a dictionary of properly organized power and data statistic.
     '''
     if len(result) == 0:
         raise NullResultException
     out = {}
     names = []
     for item in result:
         for k in item:
             # record statistics
             for entry in ['num_variants', 'total_mac', 'sample_size']:
                 if k.startswith(entry) and '{}_analyzed'.format(entry) not in out:
                     out['{}_analyzed'.format(entry)] = item[k]
             # record p-values
             if k.startswith('pvalue_'):
                 name = k[7:]
                 if name in names:
                     # resolve name conflict
                     i = 0 
                     while name + ('_{}'.format(i) if i else '') in names:
                         i += 1
                     name += '_{}'.format(i)
                 names.append(name)
                 out['power_{}'.format(name)] = item[k]
     if [is_null(out[k]) for k in out].count(True) == len(out):
         raise NullResultException
     return out
예제 #2
0
 def __result_parser(self, result):
     '''parse result input example:
     [{'num_variants_CFisher': 3, 'pvalue_CFisher': 0.06156133406564546, 'total_mac_CFisher': 4, 'sample_size_CFisher': 400, 'statistic_CFisher': 1.7976931348623157e+308}, {'statistic_WSSRankTest': 1784.0, 'total_mac_WSSRankTest': 4, 'pvalue_WSSRankTest': 0.027761334758504563, 'num_variants_WSSRankTest': 3, 'sample_size_WSSRankTest': 400}]
     output format: a dictionary of properly organized power and data statistic.
     '''
     if len(result) == 0:
         raise NullResultException
     out = {}
     names = []
     for item in result:
         for k in item:
             # record statistics
             for entry in ['num_variants', 'total_mac', 'sample_size']:
                 if k.startswith(entry) and '{}_analyzed'.format(
                         entry) not in out:
                     out['{}_analyzed'.format(entry)] = item[k]
             # record p-values
             if k.startswith('pvalue_'):
                 name = k[7:]
                 if name in names:
                     # resolve name conflict
                     i = 0
                     while name + ('_{}'.format(i) if i else '') in names:
                         i += 1
                     name += '_{}'.format(i)
                 names.append(name)
                 out['power_{}'.format(name)] = item[k]
     if [is_null(out[k]) for k in out].count(True) == len(out):
         raise NullResultException
     return out
예제 #3
0
파일: manager.py 프로젝트: gaow/SEQPower
 def preprocess(self, data, exclude=[], hide=[]):
     """format result to output, messy messy"""
     multicols = []
     for key in list(data.keys()):
         if key in ["pool"]:
             continue
         # collect results from RunningStat
         if isinstance(data[key], L.RunningStat):
             data[key], data[key + "_median"], data[key + "_std"] = (
                 data[key].mean(),
                 data[key].left(),
                 data[key].sd(),
             )
             continue
         # delete trivial data
         if is_null(data[key]):
             del data[key]
             continue
         # adjust key names
         if key in hide:
             data["_" + key] = data[key]
             del data[key]
             continue
         # multi column information
         if type(data[key]) is list:
             multicols.append(key)
     if len([x for x in data.keys() if x.startswith("power")]):
         # manually combine multiple power analysis methods into single column
         for key in ["power", "method"]:
             data[key] = []
         for key in list(data.keys()):
             if key.startswith("power") and key not in ["power", "power_std", "power_median"]:
                 if key.endswith("_std"):
                     if "power_std" not in data:
                         data["power_std"] = []
                     if "default" in key:
                         data["power_std"].append(data[key])
                     else:
                         # adjust standard error
                         data["power_std"].append(data[key] / np.sqrt(data["_replicates"]))
                 elif key.endswith("_median"):
                     if "power_median" not in data:
                         data["power_median"] = []
                     data["power_median"].append(data[key])
                 else:
                     data["power"].append(data[key])
                     data["method"].append(re.sub("power_", "", key))
                 del data[key]
     # expand table
     for key in list(data.keys()):
         if key not in ["power", "power_std", "power_median", "method", "model"] + multicols:
             data[key] = [data[key]] * (max(len(data["power"]), 1) if "power" in data else 1)
     # manually create ordered column names
     colnames = ["title", "name"] + sorted(
         [x for x in list(data.keys()) if x not in exclude],
         key=lambda x: x.replace("_", "|").replace("method", "AAA").replace("power", "AAB"),
     )
     # return: data, single row colnames, multi row colnames
     return data, [x for x in colnames if x not in multicols], [x for x in colnames if x in multicols]
예제 #4
0
 def preprocess(self, data, exclude = [], hide = []):
     '''format result to output, messy messy'''
     multicols = []
     for key in list(data.keys()):
         if key in ['pool']:
             continue
         # collect results from RunningStat
         if isinstance(data[key], L.RunningStat):
             data[key], data[key + '_median'], data[key + '_std'] = \
                 data[key].mean(), data[key].left(), data[key].sd()
             continue
         # delete trivial data
         if is_null(data[key]):
             del data[key]
             continue
         # adjust key names
         if key in hide:
             data["_" + key] = data[key]
             del data[key]
             continue
         # multi column information
         if type(data[key]) is list:
             multicols.append(key)
     if len([x for x in data.keys() if x.startswith('power')]):
         # manually combine multiple power analysis methods into single column
         for key in ['power', 'method']:
             data[key] = []
         for key in list(data.keys()):
             if key.startswith('power') and key not in ['power', 'power_std', 'power_median']:
                 if key.endswith('_std'):
                     if 'power_std' not in data:
                         data['power_std'] = []
                     if 'default' in key:
                         data['power_std'].append(data[key])
                     else:
                         # adjust standard error
                         data['power_std'].append(data[key]/np.sqrt(data['_replicates']))
                 elif key.endswith('_median'):
                     if 'power_median' not in data:
                         data['power_median'] = []
                     data['power_median'].append(data[key])
                 else:
                     data['power'].append(data[key])
                     data['method'].append(re.sub('power_', '', key))
                 del data[key]
     # expand table
     for key in list(data.keys()):
         if key not in ['power', 'power_std', 'power_median', 'method', 'model'] + multicols:
             data[key] = [data[key]] * (max(len(data['power']), 1) if 'power' in data else 1)
     # manually create ordered column names
     colnames = ['title', 'name'] + sorted([x for x in list(data.keys()) if x not in exclude],
                                           key = lambda x: x.replace("_", "|").replace('method', 'AAA').replace('power', 'AAB'))
     # return: data, single row colnames, multi row colnames
     return data, [x for x in colnames if x not in multicols], [x for x in colnames if x in multicols]
예제 #5
0
 def __get_type(self, values):
     types = []
     for item in values:
         # Here assume the input list has the same type on each element
         # Which is true in simulation data
         if isinstance(item, list):
             for i in item:
                 if not is_null(i):
                     item = i
                     break
         try:
             item = float(item)
             types.append('number')
         except:
             types.append('string')
     return types
예제 #6
0
 def __get_type(self, values):
     types = []
     for item in values:
         # Here assume the input list has the same type on each element
         # Which is true in simulation data
         if isinstance(item, list):
             for i in item:
                 if not is_null(i):
                     item = i
                     break
         try:
             item = float(item)
             types.append('number')
         except:
             types.append('string')
     return types
예제 #7
0
 def preprocess(self, data, exclude=[], hide=[]):
     '''format result to output, messy messy'''
     multicols = []
     for key in list(data.keys()):
         if key in ['pool']:
             continue
         # collect results from RunningStat
         if isinstance(data[key], L.RunningStat):
             data[key], data[key + '_median'], data[key + '_std'] = \
                 data[key].mean(), data[key].left(), data[key].sd()
             continue
         # delete trivial data
         if is_null(data[key]):
             del data[key]
             continue
         # adjust key names
         if key in hide:
             data["_" + key] = data[key]
             del data[key]
             continue
         # multi column information
         if type(data[key]) is list:
             multicols.append(key)
     if len([x for x in data.keys() if x.startswith('power')]):
         # manually combine multiple power analysis methods into single column
         for key in ['power', 'method']:
             data[key] = []
         for key in list(data.keys()):
             if key.startswith('power') and key not in [
                     'power', 'power_std', 'power_median'
             ]:
                 if key.endswith('_std'):
                     if 'power_std' not in data:
                         data['power_std'] = []
                     if 'default' in key:
                         data['power_std'].append(data[key])
                     else:
                         # adjust standard error
                         data['power_std'].append(
                             data[key] / np.sqrt(data['_replicates']))
                 elif key.endswith('_median'):
                     if 'power_median' not in data:
                         data['power_median'] = []
                     data['power_median'].append(data[key])
                 else:
                     data['power'].append(data[key])
                     data['method'].append(re.sub('power_', '', key))
                 del data[key]
     # expand table
     for key in list(data.keys()):
         if key not in [
                 'power', 'power_std', 'power_median', 'method', 'model'
         ] + multicols:
             data[key] = [data[key]] * (max(len(data['power']), 1)
                                        if 'power' in data else 1)
     # manually create ordered column names
     colnames = ['title', 'name'] + sorted(
         [x for x in list(data.keys()) if x not in exclude],
         key=lambda x: x.replace("_", "|").replace('method', 'AAA').replace(
             'power', 'AAB'))
     # return: data, single row colnames, multi row colnames
     return data, [x for x in colnames if x not in multicols
                   ], [x for x in colnames if x in multicols]