Exemple #1
0
def main():

    # Read options
    with open('options.json', 'r') as inFile:
        options = json.load(inFile)

    # Jacob method is designed to print the entire sequence of the protein reference
    options['pos_range'] = [1, 566]

    # Import data
    data = importData(options)

    # Import protein of reference
    refProt = reference_retreive(options['refProt'])

    # Get binding core and binding core positions
    coreIdxs, coreClass = getBindingCore(options, refProt)

    # Get PTM positions, type and count
    seqPTM, vaccSample, PTM_count = mapPTM(data, refProt, options)

    # Statistical test
    PTM_stats = statisticalTest(options, seqPTM, vaccSample, refProt)

    # Create HTML output
    map2HTML(options, coreIdxs, coreClass, refProt, PTM_stats, seqPTM,
             vaccSample, PTM_count)
Exemple #2
0
def prepare(file_name, image_path, workspace_dir):

    with open(file_name, 'w') as fn:
        for image in sorted(glob.glob(os.path.join(image_path, '*'))):
            exif_data = get_exif_data(Image.open(image))
            lat, lon = get_lat_lon(exif_data)

            alt, roll, yaw, pitch = xmp(image)
            #print lon, lat, alt, yaw, pitch, roll
            st = (os.path.basename(image)) + "," + str(float(lon)) + "," + str(
                float(lat)) + "," + str(float(alt)) + "," + str(
                    float(yaw)) + "," + str(float(pitch)) + "," + str(
                        float(roll)) + "\n"
            fn.write(st)

    all_images, data_matrix = utils.importData(file_name, image_path,
                                               workspace_dir)
    return all_images, data_matrix
Exemple #3
0
def main():

    # Read options
    with open('options.json', 'r') as inFile:
        options = json.load(inFile)

    # Import data
    data = importData(options)

    # Import protein of reference
    refProt = reference_retreive(options['refProt'])

    # Map PTMs
    PTM_map, vaccSample = map_PTMs(options, data, refProt)

    # Statistical test
    PTM_stats = statisticalTest(options, PTM_map, vaccSample, refProt)

    # Convert to HTML and store
    map2HTML(PTM_map, refProt, vaccSample, options, PTM_stats)
def main():

    # Read options
    with open('options.json', 'r') as inFile:
        options = json.load(inFile)

    # Import  data
    data = importData(options)

    # Import protein of reference
    refProt = reference_retreive(options['refProt'])

    # Count PTMs for each unique sequence
    seqPTM, seqCount, seqInit, PTM_count = mapSeqPTM(data, refProt, options)

    # Get binding core and binding core positions
    coreIdxs = getBindingCore(options, refProt)

    # Compute HTML document
    seq2HTML(options, seqPTM, seqCount, seqInit, PTM_count, refProt, coreIdxs)
def main():

	# Read options
	with open('options.json','r') as inFile:
		options = json.load(inFile)

	# Import data 
	data = importData(options)

	# Import protein of reference 
	refProt = reference_retreive(options['refProt'])

	# Get binding cores and binding core positions
	coreIdxs, coreClass = getBindingCore(options, refProt)

	# Map mutations
	seqMut, vaccSample = mapMutations(data, refProt, options)

	# Compute Fisher exact test
	MUT_stats = statisticalTest(options, seqMut, vaccSample, refProt)

	# Create HTML output
	map2HTML(options, coreIdxs, coreClass, refProt, MUT_stats, seqMut, vaccSample)
import random, pylab, utils

FILENAME = './data/student_survey.csv'

data = utils.importData(FILENAME)
course = data['Course']
handed = data['Handed']
sex = data['Sex']
data['Verbal'] = [
    verbal if verbal != '*' else '0' for verbal in data['Verbal']
]
verbal = [int(verbal) for verbal in data['Verbal']]
data['Age'] = [age if age != '*' else 0 for age in data['Age']]
age = [float(age) for age in data['Age']]

courseSample = random.sample(course, 192)
handedSample = random.sample(handed, 192)
sexSample = random.sample(sex, 192)
verbalSample = random.sample(verbal, 192)
ageSample = random.sample(age, 192)

p_course = utils.buildPivotTable(course)
print(p_course)
s_course = utils.buildPivotTable(courseSample)
print(s_course)
p_handed = utils.buildPivotTable(handed)
print(p_handed)
s_handed = utils.buildPivotTable(handedSample)
print(s_handed)
p_sex = utils.buildPivotTable(sex)
print(p_sex)
def main():

    # Read options
    with open('options.json') as inFile:
        options = json.load(inFile)

    # Set pos range to analyze segement of interest
    options['pos_range'] = [277, 323]

    # Import data
    data = importData(options)

    # Get glycosylation count, and non-glycosylated sites and non-amidated 277N
    vaccSample, glycoCount, nonglycoCount, nonglycoSample \
        , nonDeamidCount, nonDeamidSample, nonGlDa, nonGlDaSample = getGlycoAmid(options, data)

    # Compute fisher test
    fisherResult = fisherTest(glycoCount, vaccSample)

    # Print
    print('\n')
    print('Glycosylation Fisher\'s test result:')
    for vacc in fisherResult:
        print(vacc + ' {:.2%} ({})'.format(glycoCount[vacc]/vaccSample[vacc], vaccSample[vacc]) + \
            ' vs. PAN {:.2%} ({}): pvalue = {:.2}, oddsratio = {:.2}'.format( \
            glycoCount['PAN']/vaccSample['PAN'], vaccSample['PAN'], \
            fisherResult[vacc]['pvalue'], fisherResult[vacc]['oddsratio']))

    # Print probabilities of deamidation or glycosylation as separate entities which can
    # apper in different sequences. This method does not allow to compute a Fisher's test, but
    print('\n')
    print(
        'SCENARIO A: Considering 277N and glycosylation sites as different entities that can occur in separate segments'
    )
    print('Probability of non-deamydated and non-glycosylated in ARP:' + \
         '{:.2%}'.format((nonglycoCount['ARP']/nonglycoSample['ARP']) * (nonDeamidCount['ARP']/nonDeamidSample['ARP'])))
    print('Probability of non-deamydated and non-glycosylated in FOC:' + \
         '{:.2%}'.format((nonglycoCount['FOC']/nonglycoSample['FOC']) * (nonDeamidCount['FOC']/nonDeamidSample['FOC'])))
    print('Probability of non-deamydated and non-glycosylated in PAN:' + \
         '{:.2%}'.format((nonglycoCount['PAN']/nonglycoSample['PAN']) * (nonDeamidCount['PAN']/nonDeamidSample['PAN'])))

    # Print probabilities of sequences that include both 277N and glycosylation sites.
    # This allows to compute Fisher's test PAN vs ARP and FOC.
    print('\n')
    print(
        'SCENARIO B: Considering only sequence that included both 277N and glycosylation sites:'
    )
    print('Probability of non-deamydated and non-glycosylated in ARP:' + \
          '{:.2%}'.format((nonGlDa['ARP']/nonGlDaSample['ARP'])))
    print('Probability of non-deamydated and non-glycosylated in FOC:' + \
          '{:.2%}'.format((nonGlDa['FOC']/nonGlDaSample['FOC'])))
    print('Probability of non-deamydated and non-glycosylated in PAN:' + \
          '{:.2%}'.format((nonGlDa['PAN']/nonGlDaSample['PAN'])))

    # Fisher test on SCENARIO B:
    nonGlDaFisher = fisherTest(nonGlDa, nonGlDaSample)

    # Print
    print('\n')
    for vacc in nonGlDaFisher:
        print('Fisher\'s test result for SCENARIO B:')
        print(vacc + ' {:.2%} ({})'.format(nonGlDa[vacc]/nonGlDaSample[vacc], nonGlDaSample[vacc]) + \
            ' vs. PAN {:.2%} ({}): pvalue = {:.2}, oddsratio = {:.2}'.format( \
            nonGlDa['PAN']/nonGlDaSample['PAN'], nonGlDaSample['PAN'], \
            nonGlDaFisher[vacc]['pvalue'], nonGlDaFisher[vacc]['oddsratio']))
Exemple #8
0
def start():
    begin = msg('開始處理文件')

    # 获取各种参数
    configFile = open('./config.json', 'r', encoding='utf-8')
    config = json.loads(configFile.read())
    table = config['table']
    source = config['source']
    data = config['files']['data']
    include = config['files']['include']
    exclude = config['files']['exclude']

    # 连接数据库
    connect = mysql.connector.connect(**config['mysql'])

    # 删除旧表, 创建新表
    cursor = connect.cursor()
    cursor.execute('DROP TABLE IF EXISTS {}'.format(table))
    sql = '''CREATE TABLE {} (
        `id` int(11) NOT NULL AUTO_INCREMENT,
        `author` text COLLATE utf8mb4_unicode_ci DEFAULT NULL,
        `dynasty` text COLLATE utf8mb4_unicode_ci NOT NULL,
        `title` text COLLATE utf8mb4_unicode_ci DEFAULT NULL,
        `rhythmic` text COLLATE utf8mb4_unicode_ci DEFAULT NULL,
        `chapter` text COLLATE utf8mb4_unicode_ci DEFAULT NULL,
        `paragraphs` text COLLATE utf8mb4_unicode_ci NOT NULL,
        `notes` text COLLATE utf8mb4_unicode_ci DEFAULT NULL,
        `collection` text COLLATE utf8mb4_unicode_ci NOT NULL,
        `section` text COLLATE utf8mb4_unicode_ci DEFAULT NULL,
        `content` text COLLATE utf8mb4_unicode_ci DEFAULT NULL,
        `comment` text COLLATE utf8mb4_unicode_ci DEFAULT NULL,
        `tags` text COLLATE utf8mb4_unicode_ci DEFAULT NULL,
        PRIMARY KEY (`id`)
    ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;'''
    cursor.execute(sql.format(table))

    # 循环处理json文件
    arr = []
    l = 0
    total = 0
    for d in data:
        if len(include) and d['collection'] not in include:
            continue
        if len(exclude) and d['collection'] in exclude:
            continue
        res = importData(connect, source, table, d['folder'], d['pattern'],
                         d['dynasty'], d['collection'])
        l = max(l, len(d['collection']))
        if res['count'] is None:
            arr.append({
                'collection': d['collection'],
                'time': res['time'],
                'count': '失敗'
            })
        else:
            arr.append({
                'collection': d['collection'],
                'time': res['time'],
                'count': res['count']
            })
            total += res['count']
    cursor.close()
    connect.close()

    # 最后输出统计信息
    end = msg('所有文件處理完畢, 記錄總數: ' + str(total))
    msg()
    for v in arr:
        count = v['count']
        msg('{}  用時  {}  {}'.format(
            v['collection'].ljust(l + l - len(v['collection'])), v['time'],
            v['count']))
    msg('共計用時  ' + getTimeString(begin, end))
    msg()
Exemple #9
0
                            x, y = list(getBatches(X_val, len(X_val), self.n_features))[0]
                            feed_dict = {self.X: x, self.Y_label: y, self.L2_param: L2_parameter}
                            summary = sess.run(summary_merge, feed_dict=feed_dict)
                            val_writer.add_summary(summary, global_step)
                        print(global_step)
                        global_step += 1
                        b += 1

    # Define inference/prediction routine

# Run the script
DEBUGGING = False
if __name__ == "__main__":
    if DEBUGGING:
        # Import raw data
        df = importData(DATA_PATH)
        # Convert data into usable format by tokenizing
        formatAllMessages(df)
        word_freq = wordFrequency(df)
        word_map = makeWordMappingFromFreqList(word_freq, drop_below=2)
        tokenizeMessages(word_map, df)
        # Split into train, validation, and test sets
        spam = df[df["y"] == 1]
        ham = df[df["y"] == 0]
        print(len(spam), "+", len(ham), "=", len(spam)+len(ham))
        X_train = balanceDataset(spam.iloc[100:], ham.iloc[100:])
    else:
        # Import raw data
        df = importData(DATA_PATH)
        # Convert data into usable format by tokenizing
        formatAllMessages(df)
if not os.path.exists(tmpDir):
    os.makedirs(tmpDir)
if not os.path.exists(dataDir):
    os.makedirs(dataDir)

mast = MAST('kepler/data_search')

kicIds = mast.search({
    'max_records': 3000,  # same amount of data without exoplanets
    'sci_data_quarter': 0,  # avoid duplicate ids
})[2:, 0]

## Use this to retrieve published/confirmed planets
# mast.setDataSet("kepler/published_planets")

# confirmedPlanetKicIds = np.unique(mast.search({
#     "max_records": 200
# })[2:,1])

for i in range(len(kicIds)):
    print("%i/%i" % (i + 1, len(kicIds)))
    kicId = str(kicIds[i]).zfill(9)

    if dataExists(kicId):
        continue

    utils.importData(tmpDir, kicId)
    processData(kicId, 0)
    deleteTempFiles(kicId)

print("Data import finished\n")
Exemple #11
0
        return stree[u'class']

    def getRandomForestClass(self, data):
        sureclass = {}

        if self.RootNode == None:
            raise ('There is no Random Forest! ')
        else:
            for key, value in self.RootNode.items():
                tclass = self.getClass(value, data)
                sureclass[tclass] = sureclass.get(tclass, 0) + 1

        ans = max(sureclass.items(), key=lambda x: x[1])

        return ans[0]


if __name__ == '__main__':
    dataset = utils.importData(sys.path[0] + '/data/wine.txt')

    rf = randomforest(dataset=dataset, treeNum=10)

    rf.getRandomForest(name='test1')
    print rf.RootNode

    rf.saveTree('test1')

    rf.readTree(sys.path[0] + '/tree_json/test1.txt')

    print rf.RootNode