def analyze_construct(self, filenames=None):
        self.logger.info("ANALYZING CONSTRUCT STRUCTURES")

        # read source files
        if not filenames:
            filenames = os.listdir(self.construct_data_dir)

        for filename in filenames:
            if filename[-3:]!='pdb' and filename[-3:]!='ent':
                continue
            root, ext = os.path.splitext(os.path.basename(filename))
            print(filename)
            print(root)
            filepath = os.sep.join([self.construct_data_dir, filename])
            self.logger.info("Working on a file: {}".format(filename))
            header = parse_pdb_header(filepath)
            parser = SequenceParser(filepath)


            json_data = OrderedDict()
            json_data["header"] = header
            json_data.update(parser.get_fusions())
            json_data.update(parser.get_mutations())
            json_data.update(parser.get_deletions())
            json.dump(json_data, open(os.sep.join([settings.DATA_DIR, "{}_auto.json".format(root)]), 'w'), indent=4, separators=(',', ': '))
    def analyze_construct(self, filenames=None):
        self.logger.info("ANALYZING CONSTRUCT STRUCTURES")

        # read source files
        if not filenames:
            filenames = os.listdir(self.construct_data_dir)

        for filename in filenames:
            if filename[-3:] != 'pdb' and filename[-3:] != 'ent':
                continue
            root, ext = os.path.splitext(os.path.basename(filename))
            print(filename)
            print(root)
            filepath = os.sep.join([self.construct_data_dir, filename])
            self.logger.info("Working on a file: {}".format(filename))
            header = parse_pdb_header(filepath)
            parser = SequenceParser(filepath)

            json_data = OrderedDict()
            json_data["header"] = header
            json_data.update(parser.get_fusions())
            json_data.update(parser.get_mutations())
            json_data.update(parser.get_deletions())
            json.dump(json_data,
                      open(
                          os.sep.join(
                              [settings.DATA_DIR,
                               "{}_auto.json".format(root)]), 'w'),
                      indent=4,
                      separators=(',', ': '))
Esempio n. 3
0
    def post(self, request):
        # root, ext = os.path.splitext(request._request.FILES['pdb_file'].name)
        pdb_file = StringIO(
            request._request.FILES['pdb_file'].file.read().decode(
                'UTF-8', "ignore"))
        header = parse_pdb_header(pdb_file)
        parser = SequenceParser(pdb_file)

        json_data = OrderedDict()
        json_data["header"] = header
        json_data.update(parser.get_fusions())
        json_data.update(parser.get_mutations())
        json_data.update(parser.get_deletions())

        return Response(json_data)
Esempio n. 4
0
 def handle(self, *args, **options):
     print("Working on file {}".format(options['pdb_file']))
     header = parse_pdb_header(options['pdb_file'])
     print(header['compound'])
     sp = SequenceParser(options['pdb_file'])
     c = list(sp.mapping.keys())[0]
     poly = sp.get_chain_peptides(c)
     for peptide in poly:
         print("Start: {} Stop: {} Len: {}".format(peptide[0].id[1], peptide[-1].id[1], len(peptide)))
         sp.map_to_wt_blast(c, peptide, None, int(peptide[0].id[1]))
     sp.map_seqres()
     sp.save_excel_report("test.xlsx")
     #sp.get_report()
Esempio n. 5
0
    def handle(self, *args, **options):

        q = QueryPDB()
        q.list_xtals(verbose=False)

        for record in q.new_structures:
            pdb_code = record[0]
            wt_id = Protein.objects.get(entry_name=record[1]).id
            if not os.path.exists(
                    os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)
                                 ])):
                self.download_pdb(pdb_code)
            self.parser = SequenceParser(os.sep.join(
                [self.pdb_data_dir, "{}.pdb".format(pdb_code)]),
                                         wt_protein_id=wt_id)
            header = parse_pdb_header(
                os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)]))
            self.create_yaml(pdb_code, record[1], header)
    def handle(self, *args, **options):

        q = QueryPDB()
        q.list_xtals(verbose=False)

        for record in q.new_structures:
            pdb_code = record[0]
            wt_id = Protein.objects.get(entry_name=record[1]).id
            if not os.path.exists(os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)])):
                self.download_pdb(pdb_code)
            self.parser = SequenceParser(os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)]), wt_protein_id=wt_id)
            header = parse_pdb_header(os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)]))
            self.create_yaml(pdb_code, record[1], header)
Esempio n. 7
0
 def handle(self, *args, **options):
     root, ext = os.path.splitext(os.path.basename(options['pdb_file']))
     print("Working on file {}".format(options['pdb_file']))
     header = parse_pdb_header(options['pdb_file'])
     sp = SequenceParser(options['pdb_file'])
     print(sp.get_fusions())
     print(sp.get_mutations())
     print(sp.get_deletions())
     json_data = {}
     json_data["header"] = header
     json_data.update(sp.get_fusions())
     json_data.update(sp.get_mutations())
     json_data.update(sp.get_deletions())
     json.dump(json_data, open(os.sep.join([settings.DATA_DIR, "{}_auto.json".format(root)]), 'w'), indent=4, separators=(',', ': '))
     #json.dump(json_data, open("test.json", 'w'), indent=4, separators=(',', ': '))
Esempio n. 8
0
 def handle(self, *args, **options):
     root, ext = os.path.splitext(os.path.basename(options['pdb_file']))
     print("Working on file {}".format(options['pdb_file']))
     header = parse_pdb_header(options['pdb_file'])
     sp = SequenceParser(options['pdb_file'])
     print(sp.get_fusions())
     print(sp.get_mutations())
     print(sp.get_deletions())
     json_data = {}
     json_data["header"] = header
     json_data.update(sp.get_fusions())
     json_data.update(sp.get_mutations())
     json_data.update(sp.get_deletions())
     json.dump(
         json_data,
         open(os.sep.join([settings.DATA_DIR, "{}_auto.json".format(root)]),
              'w'),
         indent=4,
         separators=(',', ': '))
class Command(BaseCommand):
     
    logger = logging.getLogger(__name__)

    # source file directory   
    structure_data_dir = os.sep.join([settings.DATA_DIR, 'structure_data', 'structures'])
    structure_build_data_dir = os.sep.join([settings.DATA_DIR, 'structure_data'])
    pdb_data_dir = os.sep.join([settings.DATA_DIR, 'structure_data', 'pdbs'])

    def handle(self, *args, **options):

        q = QueryPDB()
        q.list_xtals(verbose=False)

        for record in q.new_structures:
            pdb_code = record[0]
            wt_id = Protein.objects.get(entry_name=record[1]).id
            if not os.path.exists(os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)])):
                self.download_pdb(pdb_code)
            self.parser = SequenceParser(os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)]), wt_protein_id=wt_id)
            header = parse_pdb_header(os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)]))
            self.create_yaml(pdb_code, record[1], header)


    def download_pdb(self, pdb_code):

        url = "http://www.rcsb.org/pdb/files/{}.pdb".format(pdb_code)
        urllib.request.urlretrieve(url, os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)]))


    def create_yaml(self, pdb_code, prot_name, data):

        yaml_pdb_data = {
            'pdb' : pdb_code, 
            'resolution' : data['resolution'],
            'publication_date' : data['release_date'][:10],
            #PDB header contains full citation
            #'pubmed_id' : data['journal_reference'],                       
            }
        yaml_struct_annotations = {
            'fusion_proteins' : { x : [y[0], y[1]] for x,y in enumerate(self.parser.fusions) }
            }
        yaml_other_data = {
            'construct' : pdb_code.lower(),
            'segments' : self.get_segments_data(prot_name),
            'segments_in_structure' : self.parser.get_segments(), 
            }

        out_fh = open('{}.yaml'.format(os.sep.join([self.structure_build_data_dir, 'structures', pdb_code])), 'w')
        out_fh.write('# PDB data\n\n')
        yaml.dump(yaml_pdb_data, out_fh, default_flow_style=False)
        out_fh.write('\n# Structure annotations\n\n')
        yaml.dump(yaml_struct_annotations, out_fh)
        out_fh.write('\n# Structure annotations\n\n')
        yaml.dump(yaml_other_data, out_fh)
        out_fh.close()


        yaml_construct = {
            'name' : pdb_code.lower(),
            'protein' : prot_name,
            }

        construct_fh = open('auto_{}.yaml'.format(os.sep.join([self.structure_build_data_dir, 'constructs', pdb_code])), 'w')
        yaml.dump(yaml_construct, construct_fh, indent=4)
        construct_fh.close()


    def get_segments_data(self, prot_entry_name):
        output = {}
        segments = ProteinSegment.objects.all()
        for segment in segments:
            resi = list(Residue.objects.filter(protein_segment = segment,
                protein_conformation__protein__entry_name = prot_entry_name).order_by('sequence_number'))
            try:
                if resi:
                    output[segment.slug] = [resi[0].sequence_number, resi[-1].sequence_number]
            except Exception as e:
                output[segment.slug] = ['-,-']
        return output
Esempio n. 10
0
class Command(BaseCommand):

    logger = logging.getLogger(__name__)

    # source file directory
    structure_data_dir = os.sep.join(
        [settings.DATA_DIR, 'structure_data', 'structures'])
    structure_build_data_dir = os.sep.join(
        [settings.DATA_DIR, 'structure_data'])
    pdb_data_dir = os.sep.join([settings.DATA_DIR, 'structure_data', 'pdbs'])

    def handle(self, *args, **options):

        q = QueryPDB()
        q.list_xtals(verbose=False)

        for record in q.new_structures:
            pdb_code = record[0]
            wt_id = Protein.objects.get(entry_name=record[1]).id
            if not os.path.exists(
                    os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)
                                 ])):
                self.download_pdb(pdb_code)
            self.parser = SequenceParser(os.sep.join(
                [self.pdb_data_dir, "{}.pdb".format(pdb_code)]),
                                         wt_protein_id=wt_id)
            header = parse_pdb_header(
                os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)]))
            self.create_yaml(pdb_code, record[1], header)

    def download_pdb(self, pdb_code):

        url = "https://www.rcsb.org/pdb/files/{}.pdb".format(pdb_code)
        urllib.request.urlretrieve(
            url, os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)]))

    def create_yaml(self, pdb_code, prot_name, data):

        yaml_pdb_data = {
            'pdb': pdb_code,
            'resolution': data['resolution'],
            'publication_date': data['release_date'][:10],
            #PDB header contains full citation
            #'pubmed_id' : data['journal_reference'],
        }
        yaml_struct_annotations = {
            'fusion_proteins':
            {x: [y[0], y[1]]
             for x, y in enumerate(self.parser.fusions)}
        }
        yaml_other_data = {
            'construct': pdb_code.lower(),
            'segments': self.get_segments_data(prot_name),
            'segments_in_structure': self.parser.get_segments(),
        }

        out_fh = open(
            '{}.yaml'.format(
                os.sep.join(
                    [self.structure_build_data_dir, 'structures', pdb_code])),
            'w')
        out_fh.write('# PDB data\n\n')
        yaml.dump(yaml_pdb_data, out_fh, default_flow_style=False)
        out_fh.write('\n# Structure annotations\n\n')
        yaml.dump(yaml_struct_annotations, out_fh)
        out_fh.write('\n# Structure annotations\n\n')
        yaml.dump(yaml_other_data, out_fh)
        out_fh.close()

        yaml_construct = {
            'name': pdb_code.lower(),
            'protein': prot_name,
        }

        construct_fh = open(
            'auto_{}.yaml'.format(
                os.sep.join(
                    [self.structure_build_data_dir, 'constructs', pdb_code])),
            'w')
        yaml.dump(yaml_construct, construct_fh, indent=4)
        construct_fh.close()

    def get_segments_data(self, prot_entry_name):
        output = {}
        segments = ProteinSegment.objects.all()
        for segment in segments:
            resi = list(
                Residue.objects.filter(
                    protein_segment=segment,
                    protein_conformation__protein__entry_name=prot_entry_name).
                order_by('sequence_number'))
            try:
                if resi:
                    output[segment.slug] = [
                        resi[0].sequence_number, resi[-1].sequence_number
                    ]
            except Exception as e:
                output[segment.slug] = ['-,-']
        return output