Example #1
0
    def get_scheme(self, url):
        '''
		获取去重模式
		'''
        scheme = "%s%s" % (self.get_path(url), self.get_params(url))
        scheme = md5(scheme)
        return scheme
Example #2
0
 def prepare(self):
     self.ammunitions = []
     _num = random.randint(6, 18)
     _real = random.randint(1, 18)
     logger.info("[+] Mix Flow Number : %d" % _num)
     for i in range(_num):
         rsa = 0
         if random.choice([1, 0]):
             shell_name = self.random_shell_name()
         else:
             shell_name = self.shell
         if _real == i:
             # 攻击流量
             shell_name = self.shell
             if self.passwd:
                 data = {self.passwd: self.payload}
             else:
                 data = self.payload
                 rsa = 1
             logger.debug("[#] Real Attack %s" % shell_name)
         elif i % 2 == 0:
             data = {
                 'p': md5(str(random.randint(1000000, 1000050))),
                 'c': self.random_data()
             }
         else:
             data = b64e(self.random_bytes())
             rsa = 1
         headers = {"User-Agent": random.choice(USER_AGENTS)}
         self.ammunitions.append({
             "data": data,
             "headers": headers,
             "name": shell_name,
             "rsa": rsa
         })
Example #3
0
    def post(self):
        password = self.get_argument('password', '')
        newpassword = self.get_argument('newpassword', '')
        confirmpassword = self.get_argument('confirmpassword', '')

        user = BlogUser.get_user_by_username(self.get_current_user)
        email = self.get_argument('email', user.email)
        if password and newpassword and newpassword:
            if newpassword == confirmpassword:
                user = self.db.get("select * from py_user where username=%s limit 1", self.get_current_user)
                if user:
                    if user.password == common.md5(password):
                        rowcount = BlogUser.update_user(self.get_current_user, email, common.md5(newpassword))
                        if rowcount == 1:
                            self.set_secure_cookie(AUTH_COOKIE_NAME, '', expires_days=-10)

                            self.flash(u"密码修改成功,请重新登录。")
                        else:
                            self.flash(u"密码保存失败")
                    else:
                        self.flash(u"原密码验证不正确")
            else:
                self.flash(u"二次输入的密码不一致")
        else:
            BlogUser.update_user(self.get_current_user, email, user.password)
        self.redirect('')
Example #4
0
def verify_user(phone, password):
    account = conn.db['account']
    try:
        re = account.find_one({'phone': int(phone)})
        return re is not None and re.has_key('password') and re[
            'password'] == common.md5(password) and re.has_key(
                'super_member') and re['super_member'] == 1
    except Exception as e:
        print(e)
        return False
Example #5
0
def getImage(imgurl,lock=fileLock):  
    content=getHtmlwithBaiduCookie(imgurl)   
    if not content:
        print 'getImage failure'
        return    
    imgname=md5(imgurl)+r'.jpg'
    saveasImage(imgname,content) 
    lock.acquire()
    savetoDownloaded(imgurl,imgname)
    lock.release()
Example #6
0
def verify_user(username, password):
    db = Database('verify_user')
    re = db.query(
        """select password from account where number='%s' and status = 1 """,
        (username, ), True)
    if re and re.has_key('password') and re['password'] == common.md5(
            password):
        return True

    return False
Example #7
0
    def post(self):
        username = self.get_argument('username', '')
        password = self.get_argument('password', '')

        rememberme = int(self.get_argument('rememberme', 1))
        if self.db.get("select * from py_user where username=%s and password=%s limit 1", username,
                       common.md5(password)):
            self.set_secure_cookie(AUTH_COOKIE_NAME, username, httponly=True, expires_days=rememberme)
            self.redirect('/admin/')
        else:
            self.flash(u'用户名与密码不匹配')
            self.redirect('/admin/login/')
Example #8
0
File: node.py Project: gcd0318/sync
 def load_incoming_file_to_store(self, limit=DEF_LIMIT):
     for pair in self.dao.fetch_new_from_main(limit):
         fullname, src_md5, size = pair
         src_fullname = self.incoming + fullname
         dest_fullname = self.store + fullname
         dest_path, filename = os.path.split(dest_fullname)
         if not os.path.exists(dest_path):
             os.mkdir(dest_path)
         shutil.copyfile(src_fullname, dest_fullname)
         if (md5(dest_fullname) == src_md5):
             self.dao.update_file_info_to_main(
                 {'status': '+1'}, ["fullname='" + fullname + "'"])
             self.dao.add_file_info_to_local(fullname, src_md5, size)
Example #9
0
def test(retry=None):
    db = Database('test')
    if retry == None:
        sql = "select * from news where status = 1"
    else:
        sql = "select * from news where status = 1 and news_id not in (select news_id from detail)"

    re = db.query(sql)
    del db

    for news in re:
        print insert_detail(common.md5(news['source_url']), news['news_id'])

    return '<center><h1> %d </h1></center>' % len(re)
Example #10
0
def hotList(max=5, type=6):
    db = Database('hotlist')
    if type != 6:
        re = db.query(
            """select * from news where type = %d and status = 1 order by read_count desc limit %d offset 0""",
            (type, max))
    else:
        re = db.query(
            """select * from news where status = 1 order by read_count desc limit %d offset 0""",
            (max, ))

    list = []
    for news in re:
        news['target'] = common.md5(news['source_url'])
        news['order'] = re.index(news) + 1
        list.append(news)

    return list
Example #11
0
def create_user(username, password, email):
    # create a user, but check if user exists first
    if User.objects(username=username).count() > 0:
        raise ServerError(ServerErrorCodes.ERR_INPUT_ERROR,
                          message='username already exists')

    # md5 hash the string
    hashed_password = md5(password, hash_secret)

    print(hashed_password)

    # save the user
    user = User()
    user.username = username
    user.password = hashed_password
    user.email = email
    user.save()

    return None
Example #12
0
    def post(self):
        import common

        username = self.get_argument('username', '')
        if not username:
            self.flash(u'请输入用户名')
            self.redirect('')
        else:
            user = BlogUser.get_user_by_username(username)
            if not user:
                self.flash(u'用户名不存在')
                self.redirect('')
            else:
                kvdata = {"user": username, "email": user.email}
                key = common.md5(common.randomstr(20))
                common.set_kvdb_value(key, kvdata)
                url = 'http://%s/reset/%s/' % (self.request.host, key)
                common.sendmail(user.email, u'重置密码', url)
                self.flash(u'邮件发送成功')
                self.redirect('')
Example #13
0
def insert_detail(target, id):
    if target == None or id == None:
        return '参数不能为空'

    if target == common.md5(s_meiyou.SOURCE_HOST):
        content = s_meiyou.detail(id)
        return content

    if target == __md5(s_dayima.SOURCE_HOST):
        content = s_dayima.detail(id)
        return content

    if target == __md5(s_yidianzixun.SOURCE_HOST):
        content = s_yidianzixun.detail(id)
        return content

    if target == __md5(s_sohu.SOURCE_HOST):
        content = s_sohu.detail(id)
        return content

    return '未能匹配到 target'
Example #14
0
    def post(self, key):
        import common

        value = common.get_kvdb_value(key)
        if not value:
            self.write(u'对不起,token已失效')
            return
        password = self.get_argument('password', '')
        confirm = self.get_argument('confirmpassword', '')
        if not password or not confirm:
            self.flash(u'密码不能为空')
            self.redirect('')
            return
        if password != confirm:
            self.flash(u'二次输入的密码不一致')
            self.redirect('')
            return
        BlogUser.update_user(value['user'], value['email'], common.md5(password))
        common.delete_kvdb(key)
        self.set_secure_cookie(AUTH_COOKIE_NAME, '', expires_days=-7)
        self.write(u'密码重置成功')
def update_md5(db):
    limit = 1000
    skip = 0

    while True:
        rows = db.fetchall(
            "select id, url from " + table_name +
            " where id <= %s and id > %s", (limit + skip, skip))

        if not len(rows): break

        skip += limit

        for row in rows:
            id, url = row
            if not url: continue
            db.execute(
                "update " + table_name + " set url_md5='%s' where id = '%s'",
                (md5(url), id))

        db.commit()

    db.close()
Example #16
0
def process_files(root, files, cursor, connection):
    for file in files:
        if file.startswith('.'):
            continue
        path_ = os.path.join(root, file)
        try:
            type_ = magic.from_file(path_, mime=True)
        except PermissionError:
            type_ = None
        try:
            md5_ = md5(path_)
        except PermissionError:
            md5_ = None
        size = os.path.getsize(path_)
        cursor.execute('''
        SELECT EXISTS(SELECT * FROM files WHERE path_=?);
        ''', (path_,))
        fetched = cursor.fetchone()
        if fetched == (1, ):
            cursor.execute('''
            SELECT * FROM files WHERE path_=? AND (type_!=? OR md5!=? OR size!=?)
            ''', (path_, type_, md5_, size))
            fetched = cursor.fetchone()
            if fetched:
                cursor.execute('''
                UPDATE files SET (type_=?, md5_=?, size=?)
                WHERE path_=?
                ''', (type_, md5_, size, path_))
                print('UPDATE PATH={}'.format(path_))
            else:
                print('SKIP PATH={}'.format(path_))
        else:
            cursor.execute('''
            INSERT INTO files (path_, type_, md5, "size") VALUES (?, ?, ?, ?)
            ''', (path_, type_, md5_, size))
            print('INSERT PATH={}'.format(path_))
        connection.commit()
def main(input_bam, paired_end, spp_version):

    # The following line(s) initialize your data object inputs on the platform
    # into dxpy.DXDataObject instances that you can start using immediately.

    input_bam_file = dxpy.DXFile(input_bam)

    input_bam_filename = input_bam_file.name
    input_bam_basename = input_bam_file.name.rstrip('.bam')
    dxpy.download_dxfile(input_bam_file.get_id(), input_bam_filename)

    intermediate_TA_filename = input_bam_basename + ".tagAlign"
    if paired_end:
        end_infix = 'PE2SE'
    else:
        end_infix = 'SE'
    final_TA_filename = input_bam_basename + '.' + end_infix + '.tagAlign.gz'

    # ===================
    # Create tagAlign file
    # ===================

    out, err = common.run_pipe([
        "bamToBed -i %s" % (input_bam_filename),
        r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'""",
        "tee %s" % (intermediate_TA_filename), "gzip -cn"
    ],
                               outfile=final_TA_filename)

    # ================
    # Create BEDPE file
    # ================
    if paired_end:
        final_BEDPE_filename = input_bam_basename + ".bedpe.gz"
        # need namesorted bam to make BEDPE
        final_nmsrt_bam_prefix = input_bam_basename + ".nmsrt"
        final_nmsrt_bam_filename = final_nmsrt_bam_prefix + ".bam"
        samtools_sort_command = \
            "samtools sort -n %s %s" % (input_bam_filename, final_nmsrt_bam_prefix)
        logger.info(samtools_sort_command)
        subprocess.check_output(shlex.split(samtools_sort_command))
        out, err = common.run_pipe([
            "bamToBed -bedpe -mate1 -i %s" %
            (final_nmsrt_bam_filename), "gzip -cn"
        ],
                                   outfile=final_BEDPE_filename)

    # =================================
    # Subsample tagAlign file
    # ================================
    logger.info("Intermediate tA md5: %s" %
                (common.md5(intermediate_TA_filename)))
    NREADS = 15000000
    if paired_end:
        end_infix = 'MATE1'
    else:
        end_infix = 'SE'
    subsampled_TA_filename = \
        input_bam_basename + \
        ".filt.nodup.sample.%d.%s.tagAlign.gz" % (NREADS/1000000, end_infix)
    steps = [
        'grep -v "chrM" %s' % (intermediate_TA_filename),
        'shuf -n %d --random-source=%s' % (NREADS, intermediate_TA_filename)
    ]
    if paired_end:
        steps.extend([r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'"""])
    steps.extend(['gzip -cn'])
    out, err = common.run_pipe(steps, outfile=subsampled_TA_filename)
    logger.info("Subsampled tA md5: %s" % (common.md5(subsampled_TA_filename)))

    # Calculate Cross-correlation QC scores
    CC_scores_filename = subsampled_TA_filename + ".cc.qc"
    CC_plot_filename = subsampled_TA_filename + ".cc.plot.pdf"

    # CC_SCORE FILE format
    # Filename <tab>
    # numReads <tab>
    # estFragLen <tab>
    # corr_estFragLen <tab>
    # PhantomPeak <tab>
    # corr_phantomPeak <tab>
    # argmin_corr <tab>
    # min_corr <tab>
    # phantomPeakCoef <tab>
    # relPhantomPeakCoef <tab>
    # QualityTag

    spp_tarball = SPP_VERSION_MAP.get(spp_version)
    assert spp_tarball, "spp version %s is not supported" % (spp_version)
    # install spp
    subprocess.check_output(shlex.split('R CMD INSTALL %s' % (spp_tarball)))
    # run spp
    run_spp_command = '/phantompeakqualtools/run_spp_nodups.R'
    out, err = common.run_pipe([
        "Rscript %s -c=%s -p=%d -filtchr=chrM -savp=%s -out=%s" %
        (run_spp_command, subsampled_TA_filename, cpu_count(),
         CC_plot_filename, CC_scores_filename)
    ])
    out, err = common.run_pipe(
        [r"""sed -r  's/,[^\t]+//g' %s""" % (CC_scores_filename)],
        outfile="temp")
    out, err = common.run_pipe(["mv temp %s" % (CC_scores_filename)])

    tagAlign_file = dxpy.upload_local_file(final_TA_filename)
    if paired_end:
        BEDPE_file = dxpy.upload_local_file(final_BEDPE_filename)

    CC_scores_file = dxpy.upload_local_file(CC_scores_filename)
    CC_plot_file = dxpy.upload_local_file(CC_plot_filename)
    xcor_qc = xcor_parse(CC_scores_filename)

    # Return the outputs
    output = {
        "tagAlign_file": dxpy.dxlink(tagAlign_file),
        "CC_scores_file": dxpy.dxlink(CC_scores_file),
        "CC_plot_file": dxpy.dxlink(CC_plot_file),
        "paired_end": paired_end,
        "RSC": float(xcor_qc.get('relPhantomPeakCoef')),
        "NSC": float(xcor_qc.get('phantomPeakCoef')),
        "est_frag_len": float(xcor_qc.get('estFragLen'))
    }
    if paired_end:
        output.update({"BEDPE_file": dxpy.dxlink(BEDPE_file)})
    return output
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description=__doc__, epilog=EPILOG,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument('--key', default='default',
                        help="The keypair identifier from the keyfile for the server.  Default is --key=default")

    parser.add_argument('--keyfile', default=os.path.expanduser("~/keypairs.json"),
                        help="The keypair file.  Default is --keyfile=%s" % (os.path.expanduser("~/keypairs.json")))

    parser.add_argument('--infile', '-i',
                        help="CSV file with metadata to update")

    parser.add_argument('--dryrun', default=False, action='store_true',
                        help="Do everything except save changes")

    parser.add_argument('--debug', default=False, action='store_true',
                        help="Print debug messages.  Default is False.")

    parser.add_argument('--put', default=False, action='store_true',
                        help="If property in the input is blank, remove that property entirely from the existing object")

    args = parser.parse_args()

    if args.debug:
        logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG)
    else:
        logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)

    key = ENC_Key(args.keyfile, args.key)  # get the keypair
    connection = ENC_Connection(key)  # initialize the connection object
    # biosample_collection = ENC_Collection(connection,'biosamples',frame='object')

    with open(args.infile, 'rU') as f:
        reader = csv.DictReader(f, delimiter=',', quotechar='"')
        for new_metadata in reader:
            uuid = new_metadata.pop('uuid', None)
            accession = new_metadata.pop('accession', None)
            if uuid:  # use the uuid if there is one
                obj_id = uuid
            elif accession:  # if no uuid then use the accession
                obj_id = accession
            else:  # if neither uuid or accession, assume this is a new object
                obj_id = None
            enc_object = ENC_Item(connection, obj_id)
            # print "Got accessioned object %s with status %s" %(enc_object.get('accession'), enc_object.get('status'))
            for prop in new_metadata:
                if new_metadata[prop].strip() == "":
                    if args.put:  # if empty, pop out the old property from the object
                        old_value = enc_object.properties.pop(prop, None)
                    continue  # skip properties with no value for post or patch
                else:  # new property or new value for old property
                    new_metadata_string = new_metadata[prop]
                    if ':' in prop:
                        prop_name, sep, prop_type = prop.partition(':')
                    else:
                        prop_name = prop
                        prop_type = 'string'
                    if prop_type == 'array':
                        # subreader = csv.reader(StringIO(new_metadata_string), delimiter=',', quotechar='"')
                        # array_items = []
                        # for line in subreader:
                        #   for s in line:
                        #       array_items.append(s)
                        print("new_metadata_string is %s" % (new_metadata_string))
                        array_items = json.loads(new_metadata_string)
                        print("array_items is %s" % (array_items))
                        json_obj = {prop_name: array_items}
                    elif prop_type == 'int' or prop_type == 'integer':
                        json_obj = {prop_name: int(new_metadata_string)}
                    elif prop_type == 'float':
                        json_obj = {prop_name: float(new_metadata_string)}
                    else:
                        json_obj = {prop_name: new_metadata_string}  # default is string
                    enc_object.properties.update(json_obj)
            if 'submitted_file_name' in enc_object.properties:
                path = os.path.expanduser(enc_object.get('submitted_file_name'))
                path = os.path.abspath(path)
                basename = os.path.basename(path)
                enc_object.properties.update({
                    'submitted_file_name': basename,
                    'md5sum': common.md5(path),
                    'file_size': os.path.getsize(path)})
            if obj_id:
                logger.info('Syncing %s' % (obj_id))
            else:
                logger.info('Syncing new object')
            logger.debug('%s' % (json.dumps(enc_object.properties, sort_keys=True, indent=4, separators=(',', ': '))))
            if not args.dryrun:
                new_object = enc_object.sync()
                try:
                    new_accession = new_object['accession']
                except:
                    pass
                else:
                    print("New accession: %s" % (new_accession))
                    if enc_object.type == 'file' and 'submitted_file_name' in json_obj:
                        upload_credentials = enc_object.new_creds()
                        print(upload_credentials)
                        rc = upload_file(upload_credentials, path)
                        print("Upload rc: %d" % (rc))
Example #19
0
 def random_shell_name(self, ext=".php"):
     return "/" + md5(self.random_str()) + ext
#请求地址,需要压力测试的接口api
# url = setUp_()[-1] + "/h5/speak/add"
# 获取话题列表
# url = setUp_()[-1] + "/h5/comment/add"
# 获取评论列表
url = setUp_()[-1] + "/h5/comment/getComment"
#构造请求头
ts = setUp_()[0]
reqId = setUp_()[1]
secret = setUp_()[2]
header = setUp_()[3]
# db = setUp_()[4]
userId = setUp_()[4]
liveId = setUp_()[5]
reqSign = reqId + ':' + secret + ':' + ts
sign = md5(reqSign)
# data = {
#     "id": reqId,
#     "timestamp": ts,
#     "sign": sign,
#     "data": {
#         "topicId": "290000451050003",
#         "type": "text",
#         "liveId": liveId,
#         "content": "我正在发言,后面是小尾巴是不是很神奇..." + ts,
#         "isReplay": "N",
#         "page": {"size": "20", "page": "1"},
#         "userId": userId
#     }
# }
# 增加评论的
Example #21
0
def main(input_bam, paired_end, spp_version):

    # The following line(s) initialize your data object inputs on the platform
    # into dxpy.DXDataObject instances that you can start using immediately.

    input_bam_file = dxpy.DXFile(input_bam)

    input_bam_filename = input_bam_file.name
    input_bam_basename = input_bam_file.name.rstrip('.bam')
    dxpy.download_dxfile(input_bam_file.get_id(), input_bam_filename)

    intermediate_TA_filename = input_bam_basename + ".tagAlign"
    if paired_end:
        end_infix = 'PE2SE'
    else:
        end_infix = 'SE'
    final_TA_filename = input_bam_basename + '.' + end_infix + '.tagAlign.gz'

    # ===================
    # Create tagAlign file
    # ===================

    out, err = common.run_pipe([
        "bamToBed -i %s" % (input_bam_filename),
        r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'""",
        "tee %s" % (intermediate_TA_filename),
        "gzip -cn"],
        outfile=final_TA_filename)

    # ================
    # Create BEDPE file
    # ================
    if paired_end:
        final_BEDPE_filename = input_bam_basename + ".bedpe.gz"
        # need namesorted bam to make BEDPE
        final_nmsrt_bam_prefix = input_bam_basename + ".nmsrt"
        final_nmsrt_bam_filename = final_nmsrt_bam_prefix + ".bam"
        samtools_sort_command = \
            "samtools sort -n %s %s" % (input_bam_filename, final_nmsrt_bam_prefix)
        logger.info(samtools_sort_command)
        subprocess.check_output(shlex.split(samtools_sort_command))
        out, err = common.run_pipe([
            "bamToBed -bedpe -mate1 -i %s" % (final_nmsrt_bam_filename),
            "gzip -cn"],
            outfile=final_BEDPE_filename)

    # =================================
    # Subsample tagAlign file
    # ================================
    logger.info(
        "Intermediate tA md5: %s" % (common.md5(intermediate_TA_filename)))
    NREADS = 15000000
    if paired_end:
        end_infix = 'MATE1'
    else:
        end_infix = 'SE'
    subsampled_TA_filename = \
        input_bam_basename + \
        ".filt.nodup.sample.%d.%s.tagAlign.gz" % (NREADS/1000000, end_infix)
    steps = [
        'grep -v "chrM" %s' % (intermediate_TA_filename),
        'shuf -n %d --random-source=%s' % (NREADS, intermediate_TA_filename)]
    if paired_end:
        steps.extend([r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'"""])
    steps.extend(['gzip -cn'])
    out, err = common.run_pipe(steps, outfile=subsampled_TA_filename)
    logger.info(
        "Subsampled tA md5: %s" % (common.md5(subsampled_TA_filename)))

    # Calculate Cross-correlation QC scores
    CC_scores_filename = subsampled_TA_filename + ".cc.qc"
    CC_plot_filename = subsampled_TA_filename + ".cc.plot.pdf"

    # CC_SCORE FILE format
    # Filename <tab>
    # numReads <tab>
    # estFragLen <tab>
    # corr_estFragLen <tab>
    # PhantomPeak <tab>
    # corr_phantomPeak <tab>
    # argmin_corr <tab>
    # min_corr <tab>
    # phantomPeakCoef <tab>
    # relPhantomPeakCoef <tab>
    # QualityTag

    # spp_tarball = SPP_VERSION_MAP.get(spp_version)
    # assert spp_tarball, "spp version %s is not supported" % (spp_version)
    # # install spp
    # subprocess.check_output(shlex.split('R CMD INSTALL %s' % (spp_tarball)))
    # run spp
    run_spp_command = '/phantompeakqualtools/run_spp.R'
    out, err = common.run_pipe([
        "Rscript %s -c=%s -p=%d -filtchr=chrM -savp=%s -out=%s"
        % (run_spp_command, subsampled_TA_filename, cpu_count(),
           CC_plot_filename, CC_scores_filename)])
    out, err = common.run_pipe([
        r"""sed -r  's/,[^\t]+//g' %s""" % (CC_scores_filename)],
        outfile="temp")
    out, err = common.run_pipe([
        "mv temp %s" % (CC_scores_filename)])

    tagAlign_file = dxpy.upload_local_file(final_TA_filename)
    if paired_end:
        BEDPE_file = dxpy.upload_local_file(final_BEDPE_filename)

    CC_scores_file = dxpy.upload_local_file(CC_scores_filename)
    CC_plot_file = dxpy.upload_local_file(CC_plot_filename)
    xcor_qc = xcor_parse(CC_scores_filename)

    # Return the outputs
    output = {
        "tagAlign_file": dxpy.dxlink(tagAlign_file),
        "CC_scores_file": dxpy.dxlink(CC_scores_file),
        "CC_plot_file": dxpy.dxlink(CC_plot_file),
        "paired_end": paired_end,
        "RSC": float(xcor_qc.get('relPhantomPeakCoef')),
        "NSC": float(xcor_qc.get('phantomPeakCoef')),
        "est_frag_len": float(xcor_qc.get('estFragLen'))
    }
    if paired_end:
        output.update({"BEDPE_file": dxpy.dxlink(BEDPE_file)})
    return output
def accession_file(f, keypair, server, dryrun, force):
	#check for duplication
	#- if it has ENCFF or TSTFF number in it's tag, or
	#- if there exists an accessioned file with the same submitted_file_name that is not deleted, replaced, revoked and has the same size
	#- then there should be a file with the same md5.  If not, warn of a mismatch between what's at DNAnexus and ENCODEd.
	#- If same md5, return the existing object.  
	#- Next, check if there's already a file with the same md5.  If it's deleted, replaced, revoked, then remodel it if --force=true,
	#- Else warn and return None
	#download
	#calculate md5 and add to f.md5sum
	#post file and get accession, upload credentials
	#upload to S3
	#remove the local file (to save space)
	#return the ENCODEd file object
	logger.debug('in accession_file with f %s' %(pprint.pformat(f['submitted_file_name'])))
	dx = f.pop('dx')

	local_fname = dx.name
	logger.info("Downloading %s" %(local_fname))
	dxpy.download_dxfile(dx.get_id(),local_fname)
	f.update({'md5sum': common.md5(local_fname)})
	f['notes'] = json.dumps(f.get('notes'))

	#check to see if md5 already in the database
	url = server + '/md5:%s?format=json&frame=object' %(f.get('md5sum'))
	r = common.encoded_get(url, keypair, return_response=True)
	try:
		r.raise_for_status()
	except:
		if r.status_code == 404:
			logger.info('No md5 matches %s' %(f.get('md5sum')))
			md5_exists = False
		else:
			logger.error('MD5 duplicate check. GET failed: %s %s' % (r.status_code, r.reason))
			logger.error(r.text)
			md5_exists = None
	else:
		md5_exists = r.json()

	#check if an ENCODE accession number in in the list of tags, as it would be if accessioned by this script or similar scripts
	for tag in dx.tags:
		m = re.findall(r'ENCFF\d{3}\D{3}', tag)
		if m:
			logger.info('%s appears to contain ENCODE accession number in tag %s.' %(dx.get_id(),m))
			accession_in_tag = True
			# if not force:
			# 	return
		else:
			accession_in_tag = False

	#TODO check here if file is deprecated and, if so, warn
	if md5_exists:
		if force:
			return patch_file(f, keypair, server, dryrun)
		else:
			logger.info("Returning duplicate file unchanged")
			return md5_exists
	else:
		logger.info('posting new file %s' %(f.get('submitted_file_name')))
		logger.debug('%s' %(f))
		new_file_object = post_file(f, keypair, server, dryrun)


	if new_file_object:
		creds = new_file_object['upload_credentials']
		env = os.environ.copy()
		env.update({
			'AWS_ACCESS_KEY_ID': creds['access_key'],
			'AWS_SECRET_ACCESS_KEY': creds['secret_key'],
			'AWS_SECURITY_TOKEN': creds['session_token'],
		})

		logger.info("Uploading file.")
		start = time.time()
		try:
			subprocess.check_call(['aws', 's3', 'cp', local_fname, creds['upload_url'], '--quiet'], env=env)
		except subprocess.CalledProcessError as e:
			# The aws command returns a non-zero exit code on error.
			logger.error("Upload failed with exit code %d" % e.returncode)
		else:
			end = time.time()
			duration = end - start
			logger.info("Uploaded in %.2f seconds" % duration)
			dx.add_tags([new_file_object.get('accession')])

	try:
		os.remove(local_fname)
	except:
		pass

	return new_file_object
Example #23
0
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description=__doc__,
        epilog=EPILOG,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument(
        '--key',
        default='default',
        help=
        "The keypair identifier from the keyfile for the server.  Default is --key=default"
    )

    parser.add_argument('--keyfile',
                        default=os.path.expanduser("~/keypairs.json"),
                        help="The keypair file.  Default is --keyfile=%s" %
                        (os.path.expanduser("~/keypairs.json")))

    parser.add_argument('--infile',
                        '-i',
                        help="CSV file with metadata to update")

    parser.add_argument('--dryrun',
                        default=False,
                        action='store_true',
                        help="Do everything except save changes")

    parser.add_argument('--debug',
                        default=False,
                        action='store_true',
                        help="Print debug messages.  Default is False.")

    parser.add_argument(
        '--put',
        default=False,
        action='store_true',
        help=
        "If property in the input is blank, remove that property entirely from the existing object"
    )

    args = parser.parse_args()

    if args.debug:
        logging.basicConfig(format='%(levelname)s:%(message)s',
                            level=logging.DEBUG)
    else:
        logging.basicConfig(format='%(levelname)s:%(message)s',
                            level=logging.INFO)

    key = ENC_Key(args.keyfile, args.key)  #get the keypair
    connection = ENC_Connection(key)  #initialize the connection object
    #biosample_collection = ENC_Collection(connection,'biosamples',frame='object')

    with open(args.infile, 'rU') as f:
        reader = csv.DictReader(f, delimiter=',', quotechar='"')
        for new_metadata in reader:
            uuid = new_metadata.pop('uuid', None)
            accession = new_metadata.pop('accession', None)
            if uuid:  #use the uuid if there is one
                obj_id = uuid
            elif accession:  #if no uuid then use the accession
                obj_id = accession
            else:  #if neither uuid or accession, assume this is a new object
                obj_id = None
            enc_object = ENC_Item(connection, obj_id)
            #print "Got accessioned object %s with status %s" %(enc_object.get('accession'), enc_object.get('status'))
            submit_new_file = False
            for prop in new_metadata:
                if new_metadata[prop].strip() == "":
                    if args.put:  #if empty, pop out the old property from the object
                        old_value = enc_object.properties.pop(prop, None)
                    continue  #skip properties with no value for post or patch
                else:  #new property or new value for old property
                    new_metadata_string = new_metadata[prop]
                    if ':' in prop:
                        prop_name, sep, prop_type = prop.partition(':')
                    else:
                        prop_name = prop
                        prop_type = 'string'
                    if prop_type == 'array':
                        # subreader = csv.reader(StringIO(new_metadata_string), delimiter=',', quotechar='"')
                        # array_items = []
                        # for line in subreader:
                        # 	for s in line:
                        # 		array_items.append(s)
                        logger.debug("new_metadata_string is %s" %
                                     (new_metadata_string))
                        array_items = json.loads(new_metadata_string)
                        logger.debug("array_items is %s" % (array_items))
                        json_obj = {prop_name: array_items}
                    elif prop_type == 'int' or prop_type == 'integer':
                        json_obj = {prop_name: int(new_metadata_string)}
                    elif prop_type == 'float':
                        json_obj = {prop_name: float(new_metadata_string)}
                    else:
                        json_obj = {
                            prop_name: new_metadata_string
                        }  #default is string
                    if prop == 'submitted_file_name':
                        new_filename = new_metadata_string
                        old_filename = enc_object.properties[
                            'submitted_file_name']
                        if new_filename != old_filename:
                            submit_new_file = True
                    enc_object.properties.update(json_obj)
            if submit_new_file:
                path = os.path.expanduser(
                    enc_object.get('submitted_file_name'))
                path = os.path.abspath(path)
                basename = os.path.basename(path)
                enc_object.properties.update({
                    'submitted_file_name': basename,
                    'md5sum': common.md5(path),
                    'file_size': os.path.getsize(path)
                })
            if obj_id:
                logger.info('Syncing %s' % (obj_id))
            else:
                logger.info('Syncing new object')
            logger.debug('%s' % (json.dumps(enc_object.properties,
                                            sort_keys=True,
                                            indent=4,
                                            separators=(',', ': '))))
            result = enc_object.sync(args.dryrun)
            if not args.dryrun:
                try:
                    assert result['status'] == 'success'
                except:
                    logger.error('New object sync failed ... Skipping. %s' %
                                 (result))
                else:
                    new_object = result['@graph'][0]
                    if 'accession' in new_object:
                        new_id = new_object['accession']
                    else:
                        new_id = new_object['uuid']
                    logger.info("New object: %s" % (new_id))
                    if enc_object.type == 'file' and 'submitted_file_name' in json_obj:
                        upload_credentials = enc_object.new_creds()
                        logger.debug(upload_credentials)
                        rc = upload_file(upload_credentials, path)
                        logger.info("Upload rc: %d" % (rc))
def accession_file(f, keypair, server, dryrun, force):
    #check for duplication
    #download
    #calculate md5 and add to f.md5sum
    #post file and get accession, upload credentials
    #upload to S3
    #remove the local file (to save space)
    #return the ENCODEd file object
    already_accessioned = False
    dx = f.pop('dx')
    for tag in dx.tags:
        m = re.search(r'(ENCFF\d{3}\D{3})|(TSTFF\D{6})', tag)
        if m:
            logger.info(
                '%s appears to contain ENCODE accession number in tag %s ... skipping'
                % (dx.get_id(), m.group(0)))
            already_accessioned = True
            break
    if already_accessioned and not force:
        return
    url = urlparse.urljoin(
        server,
        'search/?type=file&submitted_file_name=%s&format=json&frame=object' %
        (f.get('submitted_file_name')))
    r = requests.get(url, auth=keypair)
    try:
        r.raise_for_status()
        if r.json()['@graph']:
            for duplicate_item in r.json()['@graph']:
                if duplicate_item.get('status') == 'deleted':
                    logger.info(
                        "A potential duplicate file was found but its status=deleted ... proceeding"
                    )
                    duplicate_found = False
                else:
                    logger.info("Found potential duplicate: %s" %
                                (duplicate_item.get('accession')))
                    if submitted_file_size == duplicate_item.get('file_size'):
                        logger.info(
                            "%s %s: File sizes match, assuming duplicate." %
                            (str(submitted_file_size),
                             duplicate_item.get('file_size')))
                        duplicate_found = True
                        break
                    else:
                        logger.info(
                            "%s %s: File sizes differ, assuming new file." %
                            (str(submitted_file_size),
                             duplicate_item.get('file_size')))
                        duplicate_found = False
        else:
            duplicate_found = False
    except:
        logger.warning('Duplicate accession check failed: %s %s' %
                       (r.status_code, r.reason))
        logger.debug(r.text)
        duplicate_found = False

    if duplicate_found:
        if force:
            logger.info("Duplicate detected, but force=true, so continuing")
        else:
            logger.info("Duplicate detected, skipping")
            return

    local_fname = dx.name
    logger.info("Downloading %s" % (local_fname))
    dxpy.download_dxfile(dx.get_id(), local_fname)
    f.update({'md5sum': common.md5(local_fname)})
    f['notes'] = json.dumps(f.get('notes'))

    url = urlparse.urljoin(server, 'files/')
    if dryrun:
        logger.info("Dry run.  Would POST %s" % (f))
        new_file_object = {}
    else:
        r = requests.post(url,
                          auth=keypair,
                          headers={'content-type': 'application/json'},
                          data=json.dumps(f))
        try:
            r.raise_for_status()
            new_file_object = r.json()['@graph'][0]
            logger.info("New accession: %s" %
                        (new_file_object.get('accession')))
        except:
            logger.warning('POST file object failed: %s %s' %
                           (r.status_code, r.reason))
            logger.debug(r.text)
            new_file_object = {}
            if r.status_code == 409:
                try:  #cautiously add a tag with the existing accession number
                    if calculated_md5 in r.json().get('detail'):
                        url = urlparse.urljoin(
                            server,
                            '/search/?type=file&md5sum=%s' % (calculated_md5))
                        r = requests.get(url, auth=keypair)
                        r.raise_for_status()
                        accessioned_file = r.json()['@graph'][0]
                        existing_accession = accessioned_file['accession']
                        dx.add_tags([existing_accession])
                        logger.info(
                            'Already accessioned.  Added %s to dxfile tags' %
                            (existing_accession))
                except:
                    logger.info(
                        'Conflict does not appear to be md5 ... continuing')
        if new_file_object:
            creds = new_file_object['upload_credentials']
            env = os.environ.copy()
            env.update({
                'AWS_ACCESS_KEY_ID': creds['access_key'],
                'AWS_SECRET_ACCESS_KEY': creds['secret_key'],
                'AWS_SECURITY_TOKEN': creds['session_token'],
            })

            logger.info("Uploading file.")
            start = time.time()
            try:
                subprocess.check_call([
                    'aws', 's3', 'cp', local_fname, creds['upload_url'],
                    '--quiet'
                ],
                                      env=env)
            except subprocess.CalledProcessError as e:
                # The aws command returns a non-zero exit code on error.
                logger.error("Upload failed with exit code %d" % e.returncode)
                upload_returncode = e.returncode
            else:
                upload_returncode = 0
                end = time.time()
                duration = end - start
                logger.info("Uploaded in %.2f seconds" % duration)
                dx.add_tags([new_file_object.get('accession')])
        else:
            upload_returncode = -1

    try:
        os.remove(local_fname)
    except:
        pass

    return common.encoded_get(
        urlparse.urljoin(server,
                         '/files/%s' % (new_file_object.get('accession')),
                         keypair))
Example #25
0
if __name__ == '__main__':
    conn = sqlite3.connect('files2.db')
    cursor = conn.cursor()
    cursor.execute('''
    SELECT path_ FROM files
    ''')

    for row in cursor.fetchall():
        absolute_path = row[0]
        if not os.path.exists(absolute_path):
            cursor.execute(
                '''
            DELETE FROM files WHERE path_=?
            ''', (absolute_path, ))
        else:
            type_ = magic.from_file(absolute_path, mime=True)
            md5_ = md5(absolute_path)
            size = os.path.getsize(absolute_path)
            cursor.execute(
                '''
                        SELECT * FROM files WHERE path_=? AND (type_!=? OR md5!=? OR size!=?)
                        ''', (absolute_path, type_, md5_, size))
            fetched = cursor.fetchone()
            if fetched:
                cursor.execute(
                    '''
                            UPDATE files SET type_=?, md5=?, size=?
                            WHERE path_=?
                            ''', (type_, md5_, size, absolute_path))
        conn.commit()
def accession_file(f, keypair, server, dryrun, force):
	#check for duplication
	#download
	#calculate md5 and add to f.md5sum
	#post file and get accession, upload credentials
	#upload to S3
	#remove the local file (to save space)
	#return the ENCODEd file object
	already_accessioned = False
	dx = f.pop('dx')
	for tag in dx.tags:
		m = re.search(r'(ENCFF\d{3}\D{3})|(TSTFF\D{6})', tag)
		if m:
			logger.info('%s appears to contain ENCODE accession number in tag %s ... skipping' %(dx.get_id(),m.group(0)))
			already_accessioned = True
			break
	if already_accessioned and not force:
		return
	url = urlparse.urljoin(server, 'search/?type=file&submitted_file_name=%s&format=json&frame=object' %(f.get('submitted_file_name')))
	r = requests.get(url,auth=keypair)
	try:
		r.raise_for_status()
		if r.json()['@graph']:
			for duplicate_item in r.json()['@graph']:
				if duplicate_item.get('status')  == 'deleted':
					logger.info("A potential duplicate file was found but its status=deleted ... proceeding")
					duplicate_found = False
				else:
					logger.info("Found potential duplicate: %s" %(duplicate_item.get('accession')))
					submitted_file_size = dx.describe().get('size')
					if submitted_file_size ==  duplicate_item.get('file_size'):
						logger.info("%s %s: File sizes match, assuming duplicate." %(str(submitted_file_size), duplicate_item.get('file_size')))
						duplicate_found = True
						break
					else:
						logger.info("%s %s: File sizes differ, assuming new file." %(str(submitted_file_size), duplicate_item.get('file_size')))
						duplicate_found = False
		else:
			duplicate_found = False
	except:
		logger.warning('Duplicate accession check failed: %s %s' % (r.status_code, r.reason))
		logger.debug(r.text)
		duplicate_found = False

	if duplicate_found:
		if force:
			logger.info("Duplicate detected, but force=true, so continuing")
		else:
			logger.info("Duplicate detected, skipping")
			return

	local_fname = dx.name
	logger.info("Downloading %s" %(local_fname))
	dxpy.download_dxfile(dx.get_id(),local_fname)
	f.update({'md5sum': common.md5(local_fname)})
	f['notes'] = json.dumps(f.get('notes'))

	url = urlparse.urljoin(server,'files/')
	if dryrun:
		logger.info("Dry run.  Would POST %s" %(f))
		new_file_object = {}
	else:
		r = requests.post(url, auth=keypair, headers={'content-type': 'application/json'}, data=json.dumps(f))
		try:
			r.raise_for_status()
			new_file_object = r.json()['@graph'][0]
			logger.info("New accession: %s" %(new_file_object.get('accession')))
		except:
			logger.warning('POST file object failed: %s %s' % (r.status_code, r.reason))
			logger.warning(r.text)
			new_file_object = {}
			if r.status_code == 409:
				try: #cautiously add a tag with the existing accession number
					if calculated_md5 in r.json().get('detail'):
						url = urlparse.urljoin(server,'/search/?type=file&md5sum=%s' %(calculated_md5))
						r = requests.get(url,auth=keypair)
						r.raise_for_status()
						accessioned_file = r.json()['@graph'][0]
						existing_accession = accessioned_file['accession']
						dx.add_tags([existing_accession])
						logger.info('Already accessioned.  Added %s to dxfile tags' %(existing_accession))
				except:
					logger.info('Conflict does not appear to be md5 ... continuing')
		if new_file_object:
			creds = new_file_object['upload_credentials']
			env = os.environ.copy()
			env.update({
				'AWS_ACCESS_KEY_ID': creds['access_key'],
				'AWS_SECRET_ACCESS_KEY': creds['secret_key'],
				'AWS_SECURITY_TOKEN': creds['session_token'],
			})

			logger.info("Uploading file.")
			start = time.time()
			try:
				subprocess.check_call(['aws', 's3', 'cp', local_fname, creds['upload_url'], '--quiet'], env=env)
			except subprocess.CalledProcessError as e:
				# The aws command returns a non-zero exit code on error.
				logger.error("Upload failed with exit code %d" % e.returncode)
				upload_returncode = e.returncode
			else:
				upload_returncode = 0
				end = time.time()
				duration = end - start
				logger.info("Uploaded in %.2f seconds" % duration)
				dx.add_tags([new_file_object.get('accession')])
		else:
			upload_returncode = -1

	try:
		os.remove(local_fname)
	except:
		pass

	return common.encoded_get(urlparse.urljoin(server,'/files/%s' %(new_file_object.get('accession')), keypair))