Example #1
0
def backup(**kwargs):
    import sh
    bakdir = "/var/toughradius/databak"
    if not os.path.exists(bakdir):
        os.mkdir(bakdir)
    now = datetime.now()
    dbname = kwargs.pop('dbname','toughradius')
    ftphost = kwargs.pop('ftphost','127.0.0.1')
    ftpport = kwargs.pop('ftpport',21)
    ftpuser = kwargs.pop('ftpuser','')
    ftppwd = kwargs.pop('ftppwd','')
    backfile = '%s/%s-backup-%s.gz'%(bakdir,dbname,now.strftime( "%Y%m%d"))
    
    sh.gzip(sh.mysqldump(u='root',B=dbname,S="/var/toughradius/mysql/mysql.sock"),'-cf',_out=backfile)

    if '127.0.0.1' not in ftphost:
        ftp=FTP() 
        ftp.set_debuglevel(2)
        ftp.connect(ftphost,ftpport)
        ftp.login(ftpuser,ftppwd)
        ftp.cwd('/')
        bufsize = 1024
        file_handler = open(backfile,'rb')
        ftp.storbinary('STOR %s' % os.path.basename(backfile),file_handler,bufsize)
        ftp.set_debuglevel(0) 
        file_handler.close() 
        ftp.quit()
Example #2
0
def _test_replica(replica, verbose=False):
    """Test whether a replica has the checksum it reports and whether it passes the gzip test."""

    with temp_dir() as tempdir:
        tempf = os.path.join(tempdir, 'temp.gz')
        if verbose:
            print_("Downloading and checking replica: "+replica)
        dm.backend._get(replica, tempf, verbose=verbose)

        remote_checksum = dm.checksum(replica)
        local_checksum = sh.adler32(tempf, _tty_out=False).strip()

        if local_checksum != remote_checksum:
            if verbose:
                print_(replica)
                print_("Local checksum %s is different from remote checksum %s."%(local_checksum, remote_checksum))
            return False

        try:
            sh.gzip(tempf, test=True, _tty_out=False)
        except sh.ErrorReturnCode:
            if verbose:
                print_(replica)
                print_("Failed the gzip integrity test.")
            return False
        else:
            return True
Example #3
0
def backup(**kwargs):
    import sh
    bakdir = "/var/toughradius/databak"
    if not os.path.exists(bakdir):
        os.mkdir(bakdir)
    now = datetime.now()
    dbname = kwargs.pop('dbname', 'toughradius')
    ftphost = kwargs.pop('ftphost', '127.0.0.1')
    ftpport = kwargs.pop('ftpport', 21)
    ftpuser = kwargs.pop('ftpuser', '')
    ftppwd = kwargs.pop('ftppwd', '')
    backfile = '%s/%s-backup-%s.gz' % (bakdir, dbname, now.strftime("%Y%m%d"))

    sh.gzip(sh.mysqldump(u='root',
                         B=dbname,
                         S="/var/toughradius/mysql/mysql.sock"),
            '-cf',
            _out=backfile)

    if '127.0.0.1' not in ftphost:
        ftp = FTP()
        ftp.set_debuglevel(2)
        ftp.connect(ftphost, ftpport)
        ftp.login(ftpuser, ftppwd)
        ftp.cwd('/')
        bufsize = 1024
        file_handler = open(backfile, 'rb')
        ftp.storbinary('STOR %s' % os.path.basename(backfile), file_handler,
                       bufsize)
        ftp.set_debuglevel(0)
        file_handler.close()
        ftp.quit()
Example #4
0
def lzw_filter_single(min_complexity, x):
    un_comp_len = len(str(x.seq))
    comp_len = sum(imap(len, sh.gzip(f=True, _in=str(x.seq))))
    # sh.sh('lzw.sh', x) . . .
    complexity = comp_len / float(un_comp_len)
    #print complexity
    return complexity >= min_complexity
Example #5
0
    def update_cache(self):
        if not self.test_cache():
            rm(self.path, '-rf')
            mkdir('-p', self.path)

            index_file_url = '/'.join(
                [self.repo_url.url.geturl(), 'Packages.gz'])
            index_file_path = os.path.join(self.path, self.index_file)

            print("Downloading index file '{0}' --> '{1}' ...".format(
                index_file_url, index_file_path))
            try:
                with pushd(self.path):
                    wget(index_file_url, '-O', self.index_file + '.gz')
                    gzip('-d', self.index_file + '.gz')
            except Exception as err:
                print(str(err))
                self.broken = True
Example #6
0
    def update_cache(self):
        if not self.test_cache():
            rm(self.path, '-rf')
            mkdir('-p', self.path)

            index_file_url = '/'.join([self.repo_url.url.geturl(), 'Packages.gz'])
            index_file_path = os.path.join(self.path, self.index_file)

            print("Downloading index file '{0}' --> '{1}' ...".format(
                index_file_url, index_file_path
            ))
            try:
                with pushd(self.path):
                    wget(index_file_url, '-O', self.index_file + '.gz')
                    gzip('-d', self.index_file + '.gz')
            except Exception as err:
                print(str(err))
                self.broken = True
def get_files(usaf, wban):
    output = sh.grep("%s %s" % (usaf, wban),
                     "isd-history.txt").strip().split(" ")
    end = int(output.pop()[0:4])
    start = int(output.pop()[0:4])
    sh.mkdir("-p", "%s-%s" % (usaf, wban))
    os.chdir("%s-%s" % (usaf, wban))
    for year in range(start, end + 1):
        fn = "%s-%s-%s.gz" % (usaf, wban, year)
        if not os.path.exists(fn):
            sh.wget("ftp://ftp.ncdc.noaa.gov/pub/data/noaa/%s/%s" % (year, fn))
            print(fn)
    output_fn = "%s-%s-data.csv" % (usaf, wban)
    h = open(output_fn, "w")
    sh.sort(sh.cut(
        sh.grep(
            sh.cut(sh.zcat(glob.glob("*.gz")), "--output-delimiter=,",
                   "-c16-27,88-92"), "-v", "\+9999"), "--output-delimiter=.",
        "-c1-17,18"),
            _out=h)
    sh.gzip(output_fn)
    sh.mv("%s.gz" % (output_fn), "..")
Example #8
0
def upload_repo(token):
    url = request.form.get('url')
    type_ = request.form.get('type')
    ref = request.form.get('ref')

    s = storage

    user = s.find_user_by_token(token)
    logger.error("user %s uploading repo",user["username"])
    username = user["username"]

    if not url:
        return 'Empty url', 400

    if not type_:
        if url.startswith('git://') or url.endswith('.git'):
            type_ = 'git'
        else:
            return 'Cannot define type of repository by url. Please, specify type.', 400

    if type_ not in ['git', 'cvs', 'hg']:
        return 'Invalid cvs type', 400

    if "UPLOAD_FOLDER" in current_app.config:
        base_clone_path = current_app.config["UPLOAD_FOLDER"]
    else:
        base_clone_path = "/tmp"
    clone_path = "%s/%s" % (base_clone_path,os.path.basename(url))
    if os.path.exists(clone_path):
        sh.rm("-rf", clone_path)

    if type_ == 'git':
        ref = ref or "HEAD"
        sh.git("clone", url, clone_path)

        try:
            ref = sh.git("rev-parse", ref, _cwd=clone_path).strip()
        except sh.ErrorReturnCode as e:
            return 'Invalid reference. %s' % e, 400

        if not os.path.exists(clone_path + "/info.yaml"):
            return 'info.yaml is required', 400

        try:
            package_info = yaml.load(file(clone_path + '/info.yaml'))
            validate_info(package_info)
        except YAMLError:
            return 'Bad encoded info.yaml', 400
        except (ValueError, KeyError) as e:
            return str(e), 400

        try:
            depends_path = download_depends(package_info['depends'], package_info['type'], clone_path)
        except sh.ErrorReturnCode as e:
            return 'Unable to install dependencies. %s' % e, 503

        # remove info.yaml from tar.gz
        with open(clone_path + '/.gitattributes', 'w') as f:
            f.write('info.yaml export-ignore')

        try:
            logger.debug("Packing application to tar.gz")
            sh.git("archive", ref, "--worktree-attributes", format="tar", o="app.tar", _cwd=clone_path),
            if package_info["type"] == "nodejs":
                sh.tar("-uf", "app.tar", "node_modules", _cwd=clone_path)
            elif package_info["type"] == "python":
                sh.tar("-uf", "app.tar", "-C", clone_path + "/depends", *depends_path, _cwd=clone_path)
            sh.gzip("app.tar", _cwd=clone_path)
            package_files = sh.tar('-tf', 'app.tar.gz', _cwd=clone_path)
            package_info['structure'] = [f.strip() for f in package_files]
        except sh.ErrorReturnCode as e:
            return 'Unable to pack application. %s' % e, 503

        try:
            for line in sh.git("log", "-5", date="short", format="%h %ad %s [%an]", _cwd=clone_path):
                line = line.strip()

                # git log output is using ansi terminal codes which is messy for our purposes
                ansisequence = re.compile(r'\x1B\[[^A-Za-z]*[A-Za-z]')
                line = ansisequence.sub('', line)
                line = line.strip("\x1b=\r")
                line = line.strip("\x1b>")
                if not line:
                    continue
                package_info.setdefault('changelog', []).append(line)
        except sh.ErrorReturnCode as e:
            return 'Unable to pack application. %s' % e, 503


        try:
            with open(clone_path + "/app.tar.gz") as app:
                package_info['url'] = url
                uuid = upload_app(app, package_info, ref, token)
            return "Application %s was successfully uploaded" % uuid
        except (KeyError, ValueError) as e:
            return str(e), 400

    return "Application was failed to upload", 400
Example #9
0
def transformExpressionMatrixTo10XMtx(inputPath, outputDir):
    """
    input:
        path or dataframe


    column: gene name
    index: barcodeċ ( without -1 )
    """

    try:
        sh.mkdir(outputDir)
    except:
        sh.rm("-rf", outputDir)
        sh.mkdir(outputDir)

    if isinstance(inputPath, str):
        expressionMtx = pd.read_table(
            inputPath,
            index_col=0,
        )
    else:
        expressionMtx = inputPath
        expressionMtx.rename_axis("index", inplace=True)
    expressionMtx = expressionMtx.loc[:, expressionMtx.sum(0) != 0]
    barcodes = pd.Series(expressionMtx.index + "-1")
    barcodes.to_csv(f"{outputDir}barcodes.tsv", header=None, index=None)

    feature = pd.DataFrame(expressionMtx.columns)
    feature[1] = feature.iloc[:, 0]
    feature[2] = "Gene Expression"
    feature.to_csv(f"{outputDir}features.tsv",
                   sep="\t",
                   header=None,
                   index=None)

    indexMap = {
        i: k
        for i, k in zip(expressionMtx.index,
                        range(1, 1 + len(expressionMtx.index)))
    }

    featureMap = {
        i: k
        for i, k in zip(expressionMtx.columns,
                        range(1, 1 + len(expressionMtx.columns)))
    }

    expressionMtx.index = expressionMtx.index.map(indexMap)
    expressionMtx.columns = expressionMtx.columns.map(featureMap)
    expressionMtx = expressionMtx.astype(int)
    expressionMtx.reset_index(inplace=True)
    expressionMtx = expressionMtx.melt(id_vars="index")

    expressionMtx.columns = ["barcode", "feature", "count"]
    expressionMtx = expressionMtx.query("count != 0")
    expressionMtx = expressionMtx.reindex(["feature", "barcode", "count"],
                                          axis=1)
    expressionMtx.sort_values(["barcode", "feature"],
                              ascending=[True, False],
                              inplace=True)
    featureCounts, barcodeCounts, rowCounts = (
        max(expressionMtx["feature"]),
        max(expressionMtx["barcode"]),
        len(expressionMtx),
    )
    with open(f"{outputDir}matrix.mtx", "w") as fh:
        fh.write(
            f'%%MatrixMarket matrix coordinate integer general\n%metadata_json: {{"format_version": 2, "software_version": "X.X.0"}}\n{featureCounts} {barcodeCounts} {rowCounts}'
        )
        for line in expressionMtx.itertuples():
            fh.write(f"\n{line.feature} {line.barcode} {line.count}")

    sh.gzip(glob.glob(f"{outputDir}*"))
Example #10
0
 def _extract(self, file: str, target_dir: str):
     with open(file, 'r') as f:
         sh.cpio(sh.gzip(d=True, k=True, _in=f), i=True, _cwd=target_dir)
Example #11
0
def extractSeq(fastqDir, outDir, lmdbPath, threads, splitInput, cutoff):
    try:
        os.mkdir(outDir)
    except:
        logger.warning(f"{outDir} existed!!")
    if not splitInput:
        allR1Path = glob.glob(f"{fastqDir}*R1*")
        allR2Path = [x.replace("R1", "R2") for x in allR1Path]
    else:

        fastqTemp = outDir + "tempSplited/"
        try:
            sh.mkdir(fastqTemp)
        except:
            logger.warning(f"{fastqTemp} existed!!")

        allR1Path = glob.glob(f"{fastqDir}*_R1*")
        allR2Path = [x.replace("R1", "R2") for x in allR1Path]
        allSplitedPath = [
            fastqTemp + re.search(r"[\w\W]+?(?=_R1)",
                                  x.split("/")[-1])[0] + "/" for x in allR1Path
        ]

        if allR1Path[0].endswith(".gz"):
            formatGz = True
        else:
            formatGz = False

        splitedNum = threads // len(allSplitedPath)

        if splitedNum <= 1:
            allR1Path = glob.glob(f"{fastqDir}*R1*")
            allR2Path = [x.replace("R1", "R2") for x in allR1Path]
            if allR1Path[0].endswith(".gz"):
                logger.error("format gz, please uncompress it.")
                1 / 0
        else:
            mPResults = []
            with multiP(threads // 2) as mP:
                for singleR1Path, singleR2Path, singleSplitedPath in zip(
                        allR1Path, allR2Path, allSplitedPath):
                    mPResults.append(
                        mP.submit(
                            sh.seqkit,
                            "split2",
                            "-f",
                            "-1",
                            singleR1Path,
                            "-2",
                            singleR2Path,
                            p=splitedNum,
                            O=singleSplitedPath,
                            j=2,
                        ))

            tempAllSplitedR1Path = glob.glob(f"{fastqTemp}*/*R1*")
            tempAllSplitedR2Path = [
                x.replace("R1", "R2") for x in tempAllSplitedR1Path
            ]
            sampleId = set([
                re.search(r"(?<=tempSplited/)[\w\W]+?(?=_L)", x)[0]
                for x in tempAllSplitedR1Path
            ])

            if len(sampleId) != 1:
                allSample = ", ".join(sampleId)
                logger.warning(f"MORE THAN ONE INPUT SAMPLES: {allSample}")
                sampleId = sampleId.pop()
                logger.warning(f"The prefix will change to {sampleId}")
            else:
                sampleId = sampleId.pop()

            i = 0
            formatGzUseThreadContents = []
            for tempSingleSplitedR1Path, tempSingleSplitedR2Path in zip(
                    tempAllSplitedR1Path, tempAllSplitedR2Path):
                i += 1
                if formatGz:
                    sh.mv(
                        tempSingleSplitedR1Path,
                        f"{fastqTemp}{sampleId}_L{i:03}_R1_001.fastq.gz",
                    )
                    sh.mv(
                        tempSingleSplitedR2Path,
                        f"{fastqTemp}{sampleId}_L{i:03}_R2_001.fastq.gz",
                    )
                    formatGzUseThreadContents.append(
                        sh.gzip(
                            "-d",
                            f"{fastqTemp}{sampleId}_L{i:03}_R1_001.fastq.gz",
                            _bg=True,
                        ))
                    formatGzUseThreadContents.append(
                        sh.gzip(
                            "-d",
                            f"{fastqTemp}{sampleId}_L{i:03}_R2_001.fastq.gz",
                            _bg=True,
                        ))
                else:
                    sh.mv(
                        tempSingleSplitedR1Path,
                        f"{fastqTemp}{sampleId}_L{i:03}_R1_001.fastq",
                    )
                    sh.mv(
                        tempSingleSplitedR2Path,
                        f"{fastqTemp}{sampleId}_L{i:03}_R2_001.fastq",
                    )
            if formatGz:
                [x.wait() for x in formatGzUseThreadContents]

            for singleTempDir in glob.glob(f"{fastqTemp}*/"):
                sh.rmdir(singleTempDir)

            allR1Path = glob.glob(f"{fastqTemp}*R1*")
            allR2Path = [x.replace("R1", "R2") for x in allR1Path]

    allSubProcess = []
    with multiP(threads) as mP:
        for singleR1Path, singleR2Path in zip(allR1Path, allR2Path):
            allSubProcess.append(
                mP.submit(
                    processOneFastq,
                    singleR1Path,
                    singleR2Path,
                    lmdbPath,
                    outDir,
                    cutoff,
                ))
    [x.result() for x in allSubProcess]

    if not splitInput:
        pass
    else:
        sh.rm("-rf", fastqTemp)
Example #12
0
 def gzip(self, content):
     return sh.gzip('--best', '--stdout', _tty_out=False, _in=content).stdout
Example #13
0
	        '-o', 'PubkeyAuthentication=no',
	        'debian@{}/gateway.log*'.format(args.gateway), '{}/'.format(tmpdirname))
	print('Done copying, start parsing')

	devices = {}

	logs = glob.glob(tmpdirname + '/gateway.log*')
	for log in logs:
		name, ext = os.path.splitext(log)

		print('Handling {}'.format(log))

		# Uncompress this log file if needed
		if ext == '.gz':
			print('Have to uncompress first')
			gzip('-d', log)
			log = log[:-3]

		# Open it to read all of the JSON blobs
		with open(log) as f:
			print('Opened {} and parsing JSON'.format(log))

			for l in f:
				try:
					blob = json.loads(l)

					# Check if we can identify this node
					if '_meta' in blob:
						id = blob['_meta']['device_id']

						# Have to create the data structures if this is the
Example #14
0
import fileinput
import struct

from sh import gzip  # sh needed to be installed with pip3

#=================
# file names are hard-wired - too lazy to make a better user interface
inputFileName = "indexTrain.html"
gzFileName = "indexTrain.html.gz"
outputFileName = "indexTrain.h"

#=================
# first compress the file
# keep the input file, force overwrite of existing out file, put Name of file inside the GZ
gzip("-k", "-f", "-N", inputFileName)

#=================
# now convert the bytes in the GZIP to hex text
gzFile = open(gzFileName, 'rb')
outFile = open(outputFileName, 'w')

hexCount = 0
byteCount = 0

# first write header lines for the .h file as required by the C++ code that will use it
outFile.write(
    "#define index_ov2640_html_gz_len NNNN\nconst uint8_t index_ov2640_html_gz[] = {\n"
)

# deal with each byte
Example #15
0
 def convert_tar_to_targz(tar_file):
     lgr.debug('Converting tar to tar.gz...')
     sh.gzip(tar_file)
Example #16
0
            'debian@{}/gateway.log*'.format(args.gateway),
            '{}/'.format(tmpdirname))
    print('Done copying, start parsing')

    devices = {}

    logs = glob.glob(tmpdirname + '/gateway.log*')
    for log in logs:
        name, ext = os.path.splitext(log)

        print('Handling {}'.format(log))

        # Uncompress this log file if needed
        if ext == '.gz':
            print('Have to uncompress first')
            gzip('-d', log)
            log = log[:-3]

        # Open it to read all of the JSON blobs
        with open(log) as f:
            print('Opened {} and parsing JSON'.format(log))

            for l in f:
                try:
                    blob = json.loads(l)

                    # Check if we can identify this node
                    if '_meta' in blob:
                        id = blob['_meta']['device_id']

                        # Have to create the data structures if this is the
Example #17
0
def lzw_filter_single(min_complexity, x):
    un_comp_len = len(str(x.seq))
    comp_len = sum(imap(len, sh.gzip(f=True, _in=str(x.seq))))
    complexity = comp_len / float(un_comp_len)
    return complexity >= min_complexity
Example #18
0
 def convert_tar_to_targz(tar_file):
     lgr.debug('Converting tar to tar.gz...')
     sh.gzip(tar_file)