Ejemplo n.º 1
0
    def main(self):
        self.copy_file()
        try:
            try:
                self.file_type = get_file_type(self.file_name)
            except Exception, e:
                self.errors.append(str(e))
                return
            print "Starting " + self.name
            if self.file_type == 'bam':
                self.working_file = self.file_name.replace('.bam', '.fastq.gz')
                cmd = 'samtools bam2fq %s | gzip > %s' % (
                    os.path.join(self.base_dir, self.file_name),
                    os.path.join(self.base_dir, self.working_file))

                out, err = execute_command(cmd)
                if err:
                    self.execution_error = err
                    return
            if self.file_type == 'cram':
                self.working_file = self.file_name.replace(
                    '.cram', '.fastq.gz')
                cmd = 'cramtools  fastq -I %s | gzip > %s' % (
                    os.path.join(self.base_dir, self.file_name),
                    os.path.join(self.base_dir, self.working_file))

                out, err = execute_command(cmd)
                if err:
                    self.execution_error = err
                    return

            elif self.file_type == 'bz2':
                self.working_file = self.file_name.replace('.bz2', '.gz')
                cmd = 'bunzip2 -c < %s | gzip -c > %s' % (
                    os.path.join(self.base_dir, self.file_name),
                    os.path.join(self.base_dir, self.working_file))

                out, err = execute_command(cmd)
                if err:
                    self.execution_error = err
                    return

            elif self.file_type == 'sff':
                self.working_file = self.file_name.replace('.sff', '.fastq.gz')
                cmd = 'sff2fastq  %s | gzip -c > %s' % (
                    os.path.join(self.base_dir, self.file_name),
                    os.path.join(self.base_dir, self.working_file))

                out, err = execute_command(cmd)
                if err:
                    self.execution_error = err
                    return

            self.validate()
Ejemplo n.º 2
0
def start_servers():
    out, err = execute_command(
        'ssh banana "source /etc/profile.d/lsf.sh;'
        ' /ebi/microarray/home/arrayexpress/ae2_production/software/framework/restart.sh"')
    out1, err1 = execute_command(
        'ssh banana "source /etc/profile.d/lsf.sh;'
        ' /ebi/microarray/home/arrayexpress/ae2_perftest/software/framework/restart.sh"')
    out2, err2 = execute_command(
        'ssh banana "source /etc/profile.d/lsf.sh;'
        ' /ebi/microarray/home/arrayexpress/ae2_curator/software/framework/restart.sh"')
    return '\n'.join([out,err,  out1,err1, out2, err2])
Ejemplo n.º 3
0
def download_soft_file(geo_acc, by='platform'):
    # adf_tmp_dir =os.path.join(settings.TEMP_FOLDER, geo_accession.replace('GPL', 'A-GEOD-'))
    adf_tmp_dir = os.path.join(settings.ADF_LOAD_DIR,
                               geo_acc.replace('GPL', 'A-GEOD-'))

    if not os.path.exists(adf_tmp_dir):
        os.mkdir(adf_tmp_dir)
    file_name = geo_acc + '_family.soft.gz'
    host = settings.GEO_SOFT_URL % by + geo_acc
    url = settings.GEO_SOFT_URL % by + geo_acc + '/' + file_name
    print url
    # link = FTP(host=settings.GEO_SOFT_URL % by + geo_acc + , timeout=5)

    # r = requests.get(url, stream=True)
    # with open(os.path.join(adf_tmp_dir, file_name), 'wb') as f:
    # for chunk in r.iter_content(chunk_size=1024):
    #     if chunk:  # filter out keep-alive new chunks
    #         f.write(r.content)

    # with closing(urllib2.urlopen(settings.GEO_SOFT_URL % by + geo_acc + '/' + file_name)) as r:
    #     with open(os.path.join(adf_tmp_dir, file_name), 'wb') as f:
    #         shutil.copyfileobj(r, f)
    # local_filename = os.path.join(adf_tmp_dir, file_name)
    # print host
    # with closing(FTP()) as ftp:
    #     try:
    #         ftp.connect('ftp.ncbi.nih.gov',port=21, timeout= 30 * 60)  # 30 mins timeout
    #         # print ftp.getwelcome()
    #         ftp.login('anonymous', '')
    #         ftp.set_pasv(True)
    #         ftp.sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
    #         ftp.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 75)
    #         ftp.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 60)
    #         with open(local_filename, 'w+b') as f:
    #             res = ftp.retrbinary('RETR %s' % url.split('ftp.ncbi.nih.gov/')[1], f.write)
    #
    #             if not res.startswith('226 Transfer complete'):
    #                 # logging.error('Downloaded of file {0} is not compile.'.format(orig_filename))
    #                 os.remove(local_filename)
    #                 return None
    #
    #         # os.rename(local_filename, self.storage + filename + file_ext)
    #         # ftp.rename(orig_filename, orig_filename + '.copied')
    #
    #         # return filename + file_ext
    #
    #     except:
    #         raise
    #         # logging.exception('Error during download from FTP')
    command = """wget -m %s -O %s """ % (url,
                                         os.path.join(adf_tmp_dir, file_name))
    print command
    print execute_command(command)
    return os.path.join(adf_tmp_dir, file_name)
Ejemplo n.º 4
0
def main():
    out, err = execute_command("""sudo -u fg_cur -H sh -c "ssh oy-ena-login-1 ls -d /fire/staging/aexpress/*/" """)
    # print out
    # print '-' * 30
    # print err
    # print '-' * 30
    dirs = out.split('\n')
    removed = []
    not_removed = []
    for d in dirs:
        directory = d.replace('/fire/staging/aexpress/', '').replace('/','')
        if directory.startswith('E-MTAB'):

            acc = '-'.join(directory.split('-')[:3])
            print acc
            id = retrieve_study_id_by_acc(acc)
            if id:
                out1, err1 = execute_command("""sudo -u fg_cur -H sh -c "ssh oy-ena-login-1 rm -rf %s" """%d)
                if err1:
                    not_removed.append('%s:%s' % (directory, err1))
                    print err1
                else:
                    removed.append(directory)
    if removed:
        send_email(from_email='AE Automation<*****@*****.**>',
                   to_emails=[CURATION_EMAIL],
                   # to_emails=['*****@*****.**'],
                   subject='/fire/staging Cleaning Report',
                   body="""Dear Curator,
The directories below have been deleted from /fire/staging. These were found loaded in AE database.

Thanks,
AE Automation Tool.

REMOVED DIRECTORY LIST:
=======================
%s""" % '\n'.join(removed))
    if not_removed:
        send_email(from_email='AE Automation<*****@*****.**>',
                   # to_emails=['*****@*****.**', CURATION_EMAIL],
                   to_emails=['*****@*****.**'],
                   subject='/fire/staging Error Report',

                   body="""
        ERROR DIRECTORY LIST:
        =======================
        %s""" % '\n'.join(not_removed))
Ejemplo n.º 5
0
 def run(self):
     print self.command
     out, err = execute_command(self.command)
     print self.command
     print 'Out:\n', out
     print '-' * 30
     print 'Error:\n', err
     print '----------------------'
Ejemplo n.º 6
0
 def status(self):
     cmd = "ssh ebi-login-001  \"source /etc/profile.d/lsf.sh; bjobs %s\"" % self.job_id
     out, err = execute_command(cmd)
     lines = out.split('\n')
     if len(lines) == 1:
         return 'completed'
     job_line = lines[1]
     print lines[0]
     print[i for i in job_line.split(' ') if i != '']
Ejemplo n.º 7
0
def get_jobs_ids():
    out, err = execute_command('ssh banana "source /etc/profile.d/lsf.sh;bjobs -w | grep server"')
    ids = []
    for l in out.split('\n'):
        try:
            ids.append(int(l.split(' ')[0]))
        except:
            print l
    return ids
Ejemplo n.º 8
0
def tf_install(constants: dict, warn: bool = True,
               max_retries: int = 2) -> int:
    """Run through terraform installation

    Args:
        constants (dict): config dict
        warn (bool, optional): require user confirmation. Defaults to True.
        max_retries (int): Number of times to retry in case of a failure.

    Returns:
        int: return code
    """

    tf_init = ["terraform", "init"]
    tf_orc8r = ["terraform", "apply", "-target=module.orc8r", "-auto-approve"]
    tf_secrets = [
        "terraform",
        "apply",
        "-target=module.orc8r-app.null_resource.orc8r_seed_secrets",
        "-auto-approve"]
    tf_orc8r_app = ["terraform", "apply", "-auto-approve"]

    for tf_cmd in [tf_init, tf_orc8r, tf_secrets, tf_orc8r_app]:
        cmd = " ".join(tf_cmd)
        if warn and not click.confirm(f'Do you want to continue with {cmd}?'):
            continue

        for i in range(max_retries):
            # terraform fails randomly due to timeouts
            click.echo(f"Running {tf_cmd}, iteration {i}")
            rc = execute_command(tf_cmd, cwd=constants['project_dir'])
            if rc == 0:
                break
            print_error_msg(f"Install failed when running {cmd} !!!")
            if i == (max_retries - 1):
                print_error_msg(f"Max retries exceeded!!!")
                return 1

        # set the kubectl after bringing up the infra
        if tf_cmd in (tf_orc8r, tf_orc8r_app):
            kubeconfigs = glob.glob(
                constants['project_dir'] + "/kubeconfig_*")
            if len(kubeconfigs) != 1:
                print_error_msg(
                    "zero or multiple kubeconfigs found %s!!!" %
                    repr(kubeconfigs))
                return
            kubeconfig = kubeconfigs[0]
            os.environ['KUBECONFIG'] = kubeconfig
            print_info_msg(
                'For accessing kubernetes cluster, set'
                f' `export KUBECONFIG={kubeconfig}`')

        print_success_msg(f"Command {cmd} ran successfully")
    else:
        print_warning_msg(f"Skipping Command {cmd}")
    return 0
Ejemplo n.º 9
0
def kill_jobs(job_ids):
    print 'start kill'

    for i in job_ids:
        out, err = execute_command('ssh banana "source /etc/profile.d/lsf.sh;bkill -r %d"' % i)
        # print out, err
    # print 'end kill'
    while True:
        ids = get_jobs_ids()
        if not ids:
            break
        time.sleep(10)
Ejemplo n.º 10
0
def run_playbook(play: AnsiblePlay) -> int:
    """Run ansible playbook

    Args:
        play (AnsiblePlay): object describing the current play

    Returns:
        int: return code
    """
    if play.inventory:
        env = {"ANSIBLE_HOST_KEY_CHECKING": "False"}
        return execute_command(
            [
            "ansible-playbook",
            "-i",
            play.inventory,
            "-e",
            json.dumps(play.extra_vars),
            "--tags",
            ",".join(play.tags),
            play.playbook,
            ], env=env,
        )

    context.CLIARGS = ImmutableDict(
        tags=play.tags,
        skip_tags=play.skip_tags,
        connection='smart',
        verbosity=play.verbosity,
        forks=10,
        become=None,
        become_method=None,
        become_user=None,
        check=False,
        syntax=None,
        start_at_task=None,
        diff=False,
    )
    loader = DataLoader()
    variable_manager = VariableManager(loader=loader)
    variable_manager.extra_vars.update(play.extra_vars)
    inventory = InventoryManager(loader=loader)
    variable_manager.set_inventory(inventory)
    passwords = {}
    pbex = PlaybookExecutor(
        playbooks=[play.playbook],
        inventory=inventory,
        variable_manager=variable_manager,
        loader=loader,
        passwords=passwords,
    )
    return pbex.run()
Ejemplo n.º 11
0
    def copy_file(self):
        if settings.LOCAL_EXECUTION:
            cmd = 'cp %s/%s %s' % (self.ena_dir, self.file_name, self.base_dir)
        else:
            cmd = 'scp -oStrictHostKeyChecking=no  oy-ena-login-1:%s/%s %s' % (
                self.ena_dir, self.file_name, self.base_dir)

        print cmd
        out, err = execute_command(cmd)
        print out, err
        # out, err = execute_command('cp  -r %s %s' % (ena_dir, local_dir))
        # print out, err
        # exit()
        return out, err
Ejemplo n.º 12
0
def copy_files(ena_dir, local_dir):
    """Copying the data directory for the the study from ENA machine to EBI local cluster.

    :param ena_dir: The directory containing data files to be validated and also the SDRF file.
        This directory should be in `/fire/staging/aexpress/`
    :type ena_dir: str
    :param local_dir: The temp directory created on the local shared storage. This is removed after the validation ended.
    :type local_dir: str
    :return: std_out and std_err of the copy command.
    :rtype: :obj:`tuple` of :obj:`str`
    """
    if not os.path.exists(local_dir):
        print 'creating %s' % local_dir
        os.mkdir(local_dir)
    cmd = 'scp -oStrictHostKeyChecking=no  sra-login-1:%s/*.txt %s' % (
        ena_dir, local_dir)
    print cmd
    if LOCAL_EXECUTION:
        out, err = execute_command('cp  %s/*.txt %s' % (ena_dir, local_dir))
    else:
        out, err = execute_command(cmd)
    print 'executed'
    return out, err
Ejemplo n.º 13
0
def import_geo_platform(geo_acc):
    try:
        soft_file = download_soft_file(geo_acc)
        header, table = parse_soft_file(soft_file)

        generate_adf(geo_acc, header, table)
        adf_file = os.path.join(settings.ADF_LOAD_DIR,
                                geo_acc.replace('GPL', 'A-GEOD-'),
                                geo_acc + '.adf.txt')
        print execute_command('magetab_insert_array.pl -f %s -a %s -c' %
                              (adf_file, geo_acc.replace('GPL', 'A-GEOD-')))

        # shutil.copyfile(os.path.join(settings.ADF_LOAD_DIR, geo_acc.replace('GPL', 'A-GEOD-')), )
        out, err = execute_command('reset_array.pl -a A-GEOD-%s -c' %
                                   geo_acc.replace('GPL', ''))
        if 'error' in out.lower() or 'error' in err.lower():
            msg = """Dear Curators,
While trying to execute rest_array.pl for %s the we had the following output:
%s
%s""" % (geo_acc, out, err)
            send_email(from_email='AE Automation<*****@*****.**>',
                       to_emails=['*****@*****.**', '*****@*****.**'],
                       subject='GEO Array Error ' +
                       geo_acc.replace('GPL', 'A-GEOD-'),
                       body=msg)
            return
        submit_conan_task(accession=geo_acc.replace('GPL', 'A-GEOD-'),
                          pipeline_name=CONAN_PIPELINES.load_adf)

    except Exception, e:
        msg = """The following error occurred while importing: %s
%s""" % (geo_acc, str(e))
        send_email(from_email='AE Automation<*****@*****.**>',
                   to_emails=['*****@*****.**'],
                   subject='Platform imported',
                   body=msg)
Ejemplo n.º 14
0
def validate_data_files_view(request, job_id=''):
    if request.method == "POST":
        req_id = request.POST.get('id')
        data_dir = request.POST.get('data_dir')
        v = Validate.objects.filter(job_id=req_id)
        report = {
            'file_errors': {},
            'pairs_errors': [],
            'valid_files': [],
            'execution_errors': [],
            'integrity_errors': []
        }
        if v:
            v = v[0]
            v.data_dir = data_dir
            v.validation_report = json.dumps(report)
            v.status = 'P'
        else:
            v = Validate(job_id=str(req_id),
                         data_dir=data_dir,
                         validation_report=json.dumps(report))
        v.save()
        py_file = os.path.abspath(
            os.path.join(os.path.dirname(__file__), '..', '..', 'utils',
                         'validators', 'fastq_validators.py'))
        cmd = """ssh ebi-login-001 'source /etc/profile.d/lsf.sh;bsub -u ahmed -q production-rh7 "source /nfs/production3/ma/home/arrayexpress/ae_automation/resources-rh7/bashrc;which python; python %s %s %s"' """ % (
            py_file, req_id, data_dir)
        # cmd = """export PYTHONPATH="${PYTHONPATH}:/home/gemmy/PycharmProjects/ae_automation";source /home/gemmy/automation/bin/activate; python %s %s %s """ % (
        #     py_file, req_id, data_dir)
        print cmd
        out, err = execute_command(cmd)
        print out
        print '=' * 30
        print err
        return HttpResponse({}, 200)

    if request.method == "GET":
        # job_id = getattr(request, 'job_id')
        if not job_id:
            return HttpResponse('Bad Request', 400)
        v = Validate.objects.filter(job_id=job_id)
        if not v:
            return HttpResponse('Not Fount', 404)
        report = json.loads(v[0].validation_report)
        report['status'] = [i[1] for i in STATUS if i[0] == v[0].status][0]
        del report['execution_errors']
        return HttpResponse(json.dumps(report), 200)
Ejemplo n.º 15
0
def tf_destroy(
    constants: dict, warn: bool = True,
    max_retries: int = 2,
) -> int:
    """Run through terraform cleanup

    Args:
        constants (dict): Config definitions
        warn (bool): require user confirmation. Defaults to True.
        max_retries (int): Number of times to retry in case of a failure.
    Returns:
        int: Return code
    """
    if warn and not click.confirm(
            'Do you want to continue with cleanup?', abort=True,
    ):
        return 0

    # backup existing terraform state
    project_dir = constants['project_dir']
    try:
        copyfile(tf_state_fn(project_dir), tf_backup_fn(project_dir))
    except OSError:
        print_error_msg('Unable to backup terraform state')
        return 1

    tf_destroy_cmds = ["terraform", "destroy", "-auto-approve"]
    cmd = " ".join(tf_destroy_cmds)
    for i in range(max_retries):
        click.echo(f"Running {cmd}, iteration {i}")
        rc = execute_command(tf_destroy_cmds, cwd=project_dir)
        if rc == 0:
            break
        print_error_msg("Destroy Failed!!!")
        if i == (max_retries - 1):
            print_error_msg(
                "Max retries exceeded!!! Attempt cleaning up using"
                " 'orcl cleanup raw' subcommand",
            )
            return 1
    return 0
Ejemplo n.º 16
0
def upgrade(ctx):
    """
    Upgrade existing orc8r deployment
    """
    tf_cmds = [["terraform", "init", "--upgrade"], ["terraform", "refresh"],
               ["terraform", "apply", "-auto-approve"]]

    if ctx.invoked_subcommand is None:
        if click.confirm('Do you want to run upgrade prechecks?'):
            ctx.invoke(precheck)
        else:
            print_warning_msg(f"Skipping upgrade prechecks")

        click.echo("Following commands will be run during upgrade\n%s" %
                   ("\n".join((map(" ".join, tf_cmds)))))
        for cmd in tf_cmds:
            if click.confirm('Do you want to continue with %s?' %
                             " ".join(cmd)):
                rc = execute_command(cmd)
                if rc != 0:
                    print_error_msg("Upgrade Failed!!!")
                    return
Ejemplo n.º 17
0
def main():
    dirs = [
        d for d in os.listdir(TEMP_FOLDER)
        if d.startswith('E-MTAB-') \
           and os.path.isdir(os.path.join(TEMP_FOLDER, d))
    ]
    # print dirs
    corrupted = [
        'E-MTAB-3800', 'E-MTAB-3964', 'E-MTAB-4002', 'E-MTAB-4069',
        'E-MTAB-4082', 'E-MTAB-4096', 'E-MTAB-4159', 'E-MTAB-4549',
        'E-MTAB-4694', 'E-MTAB-4686', 'E-MTAB-4723', 'E-MTAB-4846',
        'E-MTAB-4264', 'E-MTAB-5044', 'E-MTAB-5169', 'E-MTAB-5362'
    ]
    errors = []
    # for d in dirs:
    #     print 'working on: ', d
    #     combined_path = os.path.join(TEMP_FOLDER, d, 'combined.txt')
    #     if not os.path.exists(combined_path):
    #         continue
    #     try:
    #         sdrf = SdrfCollection(file_path=combined_path, combined=True)
    #         paired = [r for r in sdrf.rows if r.is_paired]
    #         if paired:
    #             r = paired[0]
    #             if not(r.ena_run+'_1' in r.fastq_url or r.ena_run+'_1' in r.fastq_url):
    #                 corrupted.append(d)
    #     except Exception, e:
    #         errors.append([d,e])

    print corrupted
    print errors
    for exp in corrupted:
        exp_sub_tracking_id = retrieve_experiment_id_by_accession(exp)
        print 'MAGE-TAB_' + str(exp_sub_tracking_id)

        if not exp_sub_tracking_id:
            print "%s doesn't exist in subs tracking"
            continue
        mage_tab = os.path.join(ANNOTARE_DIR,
                                'MAGE-TAB_' + str(exp_sub_tracking_id))
        try:
            idf_file = extract_idf_file_name(mage_tab, exp)
        except Exception, e:
            print e
            continue
        print idf_file
        f = open(idf_file, 'r')
        lines = f.readlines()
        f.close()
        is_sdrf = False
        replace = '_1.fastq.gz'
        write_lines = []
        d = collections.OrderedDict()
        run_index = -1
        changed = False
        for line in lines:

            if line.strip() == '[SDRF]':
                is_sdrf = True
                write_lines.append(line.strip())
            if not is_sdrf:
                write_lines.append(line.strip())
            else:
                if line.startswith('Source Name'):
                    write_lines.append(line.strip())
                    parts = line.strip().split('\t')
                    run_index = parts.index('Comment[ENA_RUN]')
                    continue
                run = line.split('\t')[run_index]
                if run in d.keys():
                    d[run].append(line.strip())
                else:
                    d[run] = [line.strip()]

        for k, v in d.items():
            if len(v) > 1:
                if '_1.fastq.gz' in ' '.join(v) or '_2.fastq.gz' in ' '.join(
                        v):
                    print colored.red('DAMAGED: ' + 'MAGE-TAB_' +
                                      str(exp_sub_tracking_id))
                    break
                changed = True
                v[0] = v[0].replace('.fastq.gz', '_1.fastq.gz')
                v[1] = v[1].replace('.fastq.gz', '_2.fastq.gz')
            write_lines += v
        if changed:
            f = open(idf_file, 'w')
            f.write('\n'.join(write_lines))
            f.close()
            out, err = execute_command('reset_experiment.pl -c ' +
                                       'MAGE-TAB_' + str(exp_sub_tracking_id))
            print 'out: ', out
            print colored.red('error: ' + str(err))
            if not err:
                wait_for_ae_export(exp, 'MAGE-TAB_' + str(exp_sub_tracking_id))
                reload_experiment(exp)