def genSpecDistribute(): allModelList = util.listDirFiles(sourceDir) dic = {} for modelPath in allModelList: specType = getSpecType(modelPath) util.addItemToDic(dic, specType[0], modelPath) avgCount = int(modelCount / 7) for key in dic.keys(): modelMatchFileCopy(dic, key, avgCount, modelDir_avg) disCount = 0 for key in dic_dis.keys(): disCount += dic_dis[key] for key in dic.keys(): typeCount = int(float(dic_dis[key]) / disCount * modelCount) if typeCount == 0: typeCount = 1 modelMatchFileCopy(dic, key, typeCount, modelDir_dis) for key in dic.keys(): modelMatchFileCopy(dic, key, 10, targetFitsDir)
def genUpdateSQL(tableName, updateCols, paramNum, whereSQL): params = [] for i in range(paramNum): params.append("%s") if (whereSQL is None) or (whereSQL == ""): insertFormat = "update %s set(%s) VALUES (%s)" return insertFormat % (tableName, updateCols, util.listToString(params, ",")) else: insertFormat = "update %s set(%s) VALUES (%s) where %s" return insertFormat % (tableName, updateCols, util.listToString(params, ","), whereSQL)
def getCoordinateMap(data): """ get dictionary of ra and dec ra - dec """ dic_radec = {} for row in data: ra = int(row[0]) dec = int(row[1]) util.addItemToDic(dic_radec, ra, dec) return dic_radec
def getCoordinateCountMap(data): """ get dictionary of ra and dec ra - dec - count """ dic_radec = {} for row in data: ra = int(row[0]) dec = int(row[1]) count = int(row[2]) util.setItemToDic2(dic_radec, ra, dec, count) return dic_radec
def imageExtract(filePath, waveLens, isShow=True, scale=15): keywords, flux = getFitsHeaderAndFlux(filePath) wave = calcWaveWithKeywords(keywords) fileName = util.getFileNameByPath(filePath, True) plt.plot(wave, flux) plt.xlabel('wavelength') plt.ylabel('flux') plt.title(fileName + ":" + getKeyword(keywords, "SUBCLASS")) plt.axis(ymin=0) maxFlue = max(flux) maxY = 1500 if maxFlue + 100 > maxY: maxY = maxFlue + 100 plt.axis(ymax=maxY) for wl in waveLens: plt.axvline(wave[wl], color='r', ls='--') isLine = checkLine(flux, wl, scale) if isLine == 1: plt.text(wave[wl], flux[wl] - 100, '[Y]', color='r', horizontalalignment='center', verticalalignment='center', fontsize=10) if isShow: plt.show() else: plt.savefig(filePath + '.png') plt.close()
def getInputData(prefixes, table, sqlFilter): fieldList = genMagByPrefix(prefixes) fieldList.append("`type`") fieldList.append("mainclass") sql_select = sqlHelper.genSelectSQLWithFieldList(fieldList, table, sqlFilter) outFile = "data_input.txt" outFile_tag = "data_input_tag.txt" output = open(outFile, "w") output_tag = open(outFile_tag, "w") count = 0 res = [] tags = [] data = sqlHelper.getFetchAll(sql_select) for item in data: record = [] colCount = len(item) for i in range(colCount - 1): record.append(item[i]) res.append(record) tags.append(item[colCount - 1]) output.write(util.listToString(record)) output.write("\n") output_tag.write(item[colCount - 1]) output_tag.write("\n") count += 1 print "input data count : %s" % count output.close() output_tag.close() return np.mat(res), np.mat(tags).T
def fitsToDatabase(self, strategy): strategy.preHandle() filePaths = util.listDirFiles(strategy.dirPath, ".fits") for filePath in filePaths: try: fitsInfo = astroUtil.getFitsInfo(filePath) strategy.handle(fitsInfo) except Exception, ex: exUtil.printTagMessage(ex, "Fits handle error", filePath)
def getSpecType(filePath): fileName = util.getFileNameByPath(filePath) gIndex = fileName.index("g") teff = int(fileName[fileName.index("t") + 1:gIndex]) g = float(fileName[gIndex + 1:gIndex + 3]) / 10 column = "teff_m" if g < 3: column = "teff_g" if g < 1: column = "teff_sg" return DataAccessUtil.getSpecTypeByTeff(teff, column)
def calcMinChi22(flux_target, templateDir, waveRange): minChi2 = 0 templatePath = "" templatePaths = util.listDirFiles(templateDir) for path in templatePaths: data = np.loadtxt(path) wave, flux = formatWaveAndFlux(data[:, 0], data[:, 1], waveRange) chi2_value = mathUtil.chi2(flux_target, flux) if minChi2 == 0 or chi2_value < minChi2: minChi2 = chi2_value templatePath = path return templatePath, minChi2
def plotOver(lamostTable, sdssTable, mu): dic_lamost = getCoordinateDic(lamostTable) dic_sdss = getCoordinateDic(sdssTable) dic_over = {} for ra in dic_lamost.keys(): for dec in dic_lamost[ra].keys(): lcount = dic_lamost[ra][dec] scount = util.getDic2Value(dic_sdss, ra, dec) if scount is None: util.setItemToDic2(dic_over, ra, dec, 10) else: util.setItemToDic2(dic_over, ra, dec, float(lcount) / float(scount)) for ra in dic_sdss.keys(): for dec in dic_sdss[ra].keys(): if not util.isInDic2(dic_lamost, ra, dec): util.setItemToDic2(dic_over, ra, dec, 11) for ra in dic_over.keys(): for dec in dic_over[ra].keys(): alpha = dic_over[ra][dec] if alpha == 10: plotUtil.plotRectangleWithParam(plt, ra, dec, 1, 1, "red") elif alpha == 11: plotUtil.plotRectangleWithParam(plt, ra, dec, 1, 1, "blue") else: newAlpha = alpha * mu if newAlpha > 1: newAlpha = 1 plotUtil.plotRectangleWithParam(plt, ra, dec, 1, 1, "black", newAlpha) plt.title("Lamost/SDSS Covered") CommonPlot.plotGrid(plt) CommonPlot.plotGalaxy(plt) plt.savefig('/media/zdwdong/my/fits/Cover/Lamost_OVER_SDSS_' + str(mu) + '.png') plt.close()
def calcMinChi2(flux_target, templateDir, waveRange): minChi2 = 0 templatePath = "" minCoeff = None templatePaths = util.listDirFiles(templateDir) for path in templatePaths: data = np.loadtxt(path) wave, flux = formatWaveAndFlux(data[:, 0], data[:, 1], waveRange) try: coeff = mathUtil.calcChi2Coeff(flux_target, flux, wave) except Exception, ex: exUtil.printTagMessage(ex, "error", path) continue chi2_value = mathUtil.chi2_ploy(wave, flux_target, flux, coeff[0], coeff[1], coeff[2]) if minChi2 == 0 or chi2_value < minChi2: minChi2 = chi2_value templatePath = path minCoeff = coeff
def genSelectSQLWithFieldList(fieldList, fromSQL, whereSQL): fields = util.listToString(fieldList, ",") return genSelectSQL(fields, fromSQL, whereSQL)
def modelMatchFileCopy(dic, key, count, targetDir): typeList = util.randomList(dic[key], count) print "file copy, type : %s , count : %s , target : %s" % (key, len(typeList), targetDir) for modelPath in typeList: util.fileCopy(modelPath, targetDir) print "finish : %s" % modelPath
for key in dic_dis.keys(): disCount += dic_dis[key] for key in dic.keys(): typeCount = int(float(dic_dis[key]) / disCount * modelCount) if typeCount == 0: typeCount = 1 modelMatchFileCopy(dic, key, typeCount, modelDir_dis) for key in dic.keys(): modelMatchFileCopy(dic, key, 10, targetFitsDir) # genSpecDistribute() targetFitsList = util.listDirFiles(targetFitsDir, ".sm") c_1 = 0 c_2 = 0 c_m_1 = 0 c_m_2 = 0 for target in targetFitsList: st = getSpecType(target) print "%s , %s" % (target, st) templatePath, minChi2 = specUtil.modelMatch2(target, modelDir_avg) st1 = getSpecType(templatePath) print "AVG : %s , %s, %s" % (templatePath, minChi2, st1) templatePath_dis, minChi2_dis = specUtil.modelMatch2(target, modelDir_dis) st2 = getSpecType(templatePath_dis)
test2, p_value2 = stats.ks_2samp(poisson, poisson1) print p_value, p_value2 test3, p3 = stats.normaltest(poisson_std) print p3 x = stats.norm(mu, sigma) pl.plot(t, x.pdf(t), label=u"norm") pl.plot(t, x1, label=u"poisson", color="red") pl.ylabel(u"概率") pl.legend() pl.show() inputFile = "result_v1.txt" data = np.loadtxt(util.getRelativePath(inputFile)) m, n = data.shape tags = data[:, n - 1] subClassData = {} for i in range(m): tag = tags[i] for j in range(n - 1): if data[i][j] == 99: continue if not subClassData.has_key(tag): subClassData[tag] = [] while len(subClassData[tag]) - 1 < j: subClassData[tag].append([]) subClassData[tag][j].append(data[i][j])
import os import Astronomy.Util.CommonUtil as astroUtil import CodeLib.Util.CommonUtil as util # waveLens = [1188,1453,2023,2217,2313,2356,2491,3262,3613,3635,3696] waveLens = [1188, 2023, 2217, 2356, 2491, 3613, 3262, 3635, 3696] features = ["MAG1", "MAG2", "MAG3"] subClassKey = "SUBCLASS" classMap = {"O": 1, "B": 2, "A": 3, "F": 4, "G": 5, "K": 6, "M": 7, "N": 8} classCount = {"O": 0, "B": 0, "A": 0, "F": 0, "G": 0, "K": 0, "M": 0, "N": 0} inputPath = "C:\dr2" outFile = "data.txt" outFilePath = util.getRelativePath(outFile) output = open(outFilePath, "w") dirs = os.listdir(inputPath) count = 0 for dir in dirs: files = os.listdir(os.path.join(inputPath, dir)) for fileName in files: filePath = os.path.join(inputPath, dir, fileName) try: keywords, flux = astroUtil.getFitsHeaderAndFlux(filePath) record = [] for fea in features: record.append(keywords[fea])
import numpy as np import CodeLib.Util.CommonUtil as util import Astronomy.Util.CommonUtil as astroUtil from CodeLib.Util import MathUtil inputFile = "data_v4.txt" data = util.loadTxtData(inputFile) m, n = data.shape testCount = int(m * 0.2) testData = data[0:testCount, :] trainData = data[testCount:m, :] subClassData, subClassCount = astroUtil.formatClassifyData(trainData) classMeanVar = {} for key in subClassData.keys(): classMeanVar[key] = [] for i in range(len(subClassData[key])): if i < 3: mean, var, num = MathUtil.calcMeanVarCount(subClassData[key][i]) classMeanVar[key].append([mean, var, num]) else: classMeanVar[key].append(np.sum(subClassData[key][i])) def testData(data): m, n = data.shape tags = data[:, n - 1] errorCount = 0
def genInsertSQL(tableName, paramNum): params = [] insertFormat = "INSERT INTO %s VALUES (%s)" for i in range(paramNum): params.append("%s") return insertFormat % (tableName, util.listToString(params, ","))
import CodeLib.Util.PlotUtil as plotUtil import CodeLib.Util.CommonUtil as util sql_select_distance = "select mag3,teff from dr3.All_Distance where mag3>0" sql_count_distance = "select count(*) from dr3.All_Distance where mag3>0" rs, teffs, count = dfh.get2FieldsData(sql_select_distance, sql_count_distance) dic = {} for i in range(count): k = int(teffs[i]) k1 = int(k / 100) k2 = k1 + 1 r = float(rs[i]) util.addItemToDic(dic, k1, r) util.addItemToDic(dic, k2, r) x_mean = [] y_mean = [] x_3std1 = [] y_3std1 = [] x_3std2 = [] y_3std2 = [] for key in dic.keys(): value = dic[key] std = np.std(value) value_new = value / std dic[key] = value_new