Exemple #1
0
def generate_dataset(n_user, n_posts, n_comments, var):

    gen = DocumentGenerator()
    users = []
    for _ in range(n_user):
        nickname = gen.name()
        user = User(username=nickname.lower().replace(' ', '_'),
                    nickname=nickname,
                    password=rand_passwd(),
                    email=get_email(nickname))

        posts = get_dummy_posts(n=n_posts + randint(-var, var),
                                gen=gen,
                                author=user)
        comments = get_dummy_comments(n=n_comments + randint(-var, var),
                                      gen=gen,
                                      author=user)
        user.posts, user.comments = posts, comments

        users.append(user)

    posts = [p for user in users for p in user.posts]
    comments = [c for user in users for c in user.comments]

    # attach comments to posts
    for comment in comments:
        post = sample(posts, 1)[0]
        post.comments.append(comment)
        comment.post = post
    return users, posts, comments
Exemple #2
0
        clean = [f.get() for f in futures]
        for c in clean:
            if c is not None:
                scraper_sessions[c.proxy] = c

        if len(scraper_sessions) > 0:
            print(len(scraper_sessions))




    c = 0
    while True:
        thread_pool = Pool(pool_size)
        futures = list()
        r = random.choice([gen.email(), gen.word()+gen.word(), gen.name()+gen.word()+gen.word()])
        #r = get_random_string(random.randrange(120, 500))
        request_data = {"doAuth": "1", "login": r.replace(" ", ""), "password": gen.word()+gen.word()+str(gen.small_int())}
        for i in range(pool_size):
            if proxy_index < len(proxies):
                proxy = proxies[proxy_index]
                proxy_index += 1
            else:
                proxy = proxies[0]
                proxy_index = 1
            if proxy in scraper_sessions:
                futures.append(thread_pool.apply_async(post_proxy, [url, proxy, request_data, proxies, (None, scraper_sessions[proxy])[cloudflare]]))


        clean = [f.get() for f in futures]
        for x in clean:
Exemple #3
0
    def handle(self, *args, **options):
        if len(options['user']) >= 0 and len(options['password']) >= 0:
            image_rec_count = 0
            finding_rec_count = 0
            Site.objects.all().delete()
            MedicalImage.truncate()
            XrayAnalysisFinding.truncate()
            PatientInfo.truncate()
            site_count = 10
            doc_gen = DocumentGenerator()
            
            for i in range(0, site_count):
                try:
                    Hospitals = Hospital.objects.all()
                    hospital = random.choice(Hospitals)
                    site = Site.objects.create(code=str(random.randint(10000, 30000)), hospital=hospital)
                    port = 3600 + i

                    connection = mysql.connector.connect(host='10.60.3.4',
                                                         port=port,
                                                         database='medical_analysis',
                                                         user=options['user'][0],
                                                         password=options['password'][0])
                    cursor = connection.cursor(dictionary=True)

                    sql_select_Query = 'SELECT * FROM upload_item'
                    cursor.execute(sql_select_Query)
                    upload_items = cursor.fetchall()
                    for item in upload_items:
                        medical_image = MedicalImage.objects.create(image_date=item['image_date'], image_path=item['image_path'], image_name=item['name'], image_size=item['size'], image_type=item['type'], site=site)
                        image_rec_count += 1

                        PatientInfo.objects.create(date_acquired=item['image_date'], name=doc_gen.name(), identity_no=self.gen_ic(), gender=random.choice([MALE, FEMALE]), dob=random_timestamp(part='DATE'), modality='CT', medical_image=medical_image)

                        sql_select_Query = 'SELECT f.name, f.value FROM upload_item as i INNER JOIN xray_analysis as a ON i.id = a.upload_item_id INNER JOIN xray_analysis_finding AS f ON a.id = f.analysis_id WHERE i.id = %s'

                        cursor.execute(sql_select_Query, (item['id'],))
                        xray_analysis_findings = cursor.fetchall()
                        for finding in xray_analysis_findings:
                            xray_analysis_finding = XrayAnalysisFinding.objects.create(name=finding['name'], value=finding['value'], medical_image=medical_image)
                            finding_rec_count += 1
                except Error as e:
                    print('Error reading data from Mariadb {}'.format(port), e)
                    break
                finally:
                    if connection:
                        if connection.is_connected():
                            connection.close()
                            cursor.close()
                            print('Mariadb connection is closed')
                if image_rec_count > 0 and finding_rec_count > 0:
                    print('{} / {} finished. {} image(s) added. {} finding(s) added.'.format(i+1, site_count, image_rec_count, finding_rec_count))
                
            if image_rec_count > 0 and finding_rec_count > 0:
                print('Done. Total {} image(s) added. Total {} finding(s) added.'.format(image_rec_count, finding_rec_count))
Exemple #4
0
    def handle(self, *args, **options):
        data_date = None
        date_str = options['data_date'][0]
        if date_str == 'reset':
            XrayAnalysisFinding.truncate()
            MedicalImage.truncate()
            PatientInfo.truncate()
            AnalysisSetting.truncate()
            Site.truncate()
            print('demo data truncated')
        elif date_str == 'today':
            data_date = date.today()
        else:
            from datetime import datetime
            format_str = '%Y-%m-%d'  # The format
            data_date = datetime.strptime(date_str, format_str)
        if data_date is None:
            return

        image_rec_count = 0
        finding_rec_count = 0
        index = 0
        Hospitals = Hospital.objects.all()
        site_count = len(Hospitals)
        doc_gen = DocumentGenerator()
        today = date.today()

        for hospital in Hospitals:
            site = Site.objects.filter(hospital=hospital).first()
            if site is None:
                code = str(random.randint(10000, 30000))
                site = Site.objects.filter(code=code).first()
                while site is not None:
                    code = str(random.randint(10000, 30000))
                    site = Site.objects.filter(code=code).first()
                site = Site.objects.create(code=code, hospital=hospital)

            for i in range(random.randint(9, 30)):
                dob = random_timestamp(part='DATE')
                age = today.year - dob.year - ((today.month, today.day) <
                                               (dob.month, dob.day))
                patient_info = PatientInfo.objects.create(
                    date_acquired=data_date,
                    name=doc_gen.name(),
                    identity_no=self.gen_ic(),
                    gender=random.choice([MALE, FEMALE]),
                    dob=dob,
                    age=age,
                    modality='CT',
                    site=site)

                medical_image = MedicalImage.objects.create(
                    image_date=data_date,
                    image_path='/demo_path',
                    image_name='demo',
                    site=site,
                    patient_info=patient_info)
                image_rec_count += 1

                for disease in DISEASES:
                    xray_analysis_finding = XrayAnalysisFinding.objects.create(
                        name=disease,
                        value=random.randint(0, 100),
                        medical_image=medical_image)
                    finding_rec_count += 1
            index += 1
            if index >= 0 or (image_rec_count > 0 and finding_rec_count > 0):
                print(
                    '{} / {} site(s) finished. {} image(s) added. {} finding(s) added.'
                    .format(index, site_count, image_rec_count,
                            finding_rec_count))

        if image_rec_count > 0 and finding_rec_count > 0:
            print('Done. Total {} image(s) added. Total {} finding(s) added.'.
                  format(image_rec_count, finding_rec_count))