コード例 #1
0
ファイル: postprocess.py プロジェクト: imsorry1121/sn_crawler
def getMnaFeature():
	with open(ut.getFileLocation(outputPath, featureMnaFilename), "w") as fo:
		with open(ut.getFileLocation(outputPath, featureRankFilename), "r") as fi:
			for line in fi:
				data = line.strip().split(" ")
				gt = data[0]
				features = data[2:-2]
				fo.write(" ".join([gt]+features)+"\n")
コード例 #2
0
ファイル: postprocess.py プロジェクト: imsorry1121/sn_crawler
def getNmFeature():
	with open(ut.getFileLocation(outputPath, featureNmFilename), "w") as fo:
		with open(ut.getFileLocation(outputPath, featureRankFilename), "r") as fi:
			for line in fi:
				data = line.strip().split(" ")
				gt = data[0]
				features = [f.split(":")[1] for f in data[2:4]]
				fo.write(",".join([gt]+features)+"\n")
コード例 #3
0
ファイル: postprocess.py プロジェクト: sychen1121/sn_crawler
def getMnaFeature():
    with open(ut.getFileLocation(outputPath, featureMnaFilename), "w") as fo:
        with open(ut.getFileLocation(outputPath, featureRankFilename),
                  "r") as fi:
            for line in fi:
                data = line.strip().split(" ")
                gt = data[0]
                features = data[2:-2]
                fo.write(" ".join([gt] + features) + "\n")
コード例 #4
0
ファイル: postprocess.py プロジェクト: sychen1121/sn_crawler
def getNmFeature():
    with open(ut.getFileLocation(outputPath, featureNmFilename), "w") as fo:
        with open(ut.getFileLocation(outputPath, featureRankFilename),
                  "r") as fi:
            for line in fi:
                data = line.strip().split(" ")
                gt = data[0]
                features = [f.split(":")[1] for f in data[2:4]]
                fo.write(",".join([gt] + features) + "\n")
コード例 #5
0
ファイル: postprocess.py プロジェクト: imsorry1121/sn_crawler
def getSvmFeature():
	with open(ut.getFileLocation(outputPath, featureSvmFilename), "w") as fo:
		with open(ut.getFileLocation(outputPath, featureRankFilename), "r") as fi:
			for line in fi:
				data = line.strip().split(" ")
				gt = data[0]
				gid = data[1].split(":")[1]
				tid = data[-1]
				features = [f.split(":")[1] for f in data[2:-2]]
				if len(features)<50:
					features+=["0"]*(50-len(features))
				fo.write(",".join([gt,gid,tid]+features)+"\n")
コード例 #6
0
ファイル: postprocess.py プロジェクト: sychen1121/sn_crawler
def getSvmFeature():
    with open(ut.getFileLocation(outputPath, featureSvmFilename), "w") as fo:
        with open(ut.getFileLocation(outputPath, featureRankFilename),
                  "r") as fi:
            for line in fi:
                data = line.strip().split(" ")
                gt = data[0]
                gid = data[1].split(":")[1]
                tid = data[-1]
                features = [f.split(":")[1] for f in data[2:-2]]
                if len(features) < 50:
                    features += ["0"] * (50 - len(features))
                fo.write(",".join([gt, gid, tid] + features) + "\n")
コード例 #7
0
ファイル: model.py プロジェクト: sychen1121/sn_crawler
def mnaConstraint(n=1558):
	scores = list()
	with open(ut.getFileLocation(predPath, predictionMnaOriginFilename), "r") as fi:
		fi.readline()
		for line in fi:
			scores.append(float(line.strip().split()[1]))
	oneMapping(scores, predictionMnaConstraintFilename, n)
コード例 #8
0
def main():
    spark = utility.createSparkSession("Attunity EH Archiver to Data Lake")
    sliceDt = sys.argv[1]
    data_source = sys.argv[2]
    file_location = utility.getFileLocation(data_source, sliceDt)
    print file_location

    base = utility.getBase(spark, file_location)
    ub = utility.getunpackBody(spark, base)
    if ub.count() > 0:
        # Attunity specific stuff
        tableList = getTableList(spark, ub)
        sch = getConfiguredTableList()
        for tnameRow in tableList:
            try:
                getunpackBodyData(
                    spark, ub, tnameRow['tname'],
                    getConfiguredTableSchemaDefinition(sch, tnameRow['tname']))
                print "Successful: %s" % (tnameRow['tname'])
            except:
                print "Failed: %s" % (tnameRow['tname'])
                raise

    else:
        print "Source file %s and all subdirectories are Empty" % (
            file_location)
    spark.stop()
コード例 #9
0
def main():

    sliceDt = sys.argv[1]
    file_location = utility.getFileLocation(DATA_SOURCE,sliceDt)
    print file_location

    spark = utility.createSparkSession("Tealium EH Archiver to Data Lake")
    base = utility.getBase(spark,file_location)
    ub = utility.getunpackBody(spark,base)
    if ub.count() > 0:    
        unpackBodyData = spark.read.json((ub.rdd.map(extractData)))
        unpackBodyData.write.saveAsTable(TARGET_TABLE_NAME,mode="append")
    else:
        print "Source file %s and all subdirectories are Empty" %(file_location)
    spark.stop()